From e09c0022a102ea2f61acebad9a456ca7290f73c5 Mon Sep 17 00:00:00 2001
From: Marco Realacci <marco@marcorealacci.me>
Date: Wed, 15 Jan 2025 03:35:13 +0100
Subject: [PATCH] add fds AI generated questions

---
 data/questions/fds_llm.json | 6749 +++++++++++++++++++++++++++++++++++
 1 file changed, 6749 insertions(+)
 create mode 100644 data/questions/fds_llm.json

diff --git a/data/questions/fds_llm.json b/data/questions/fds_llm.json
new file mode 100644
index 0000000..c5f23c0
--- /dev/null
+++ b/data/questions/fds_llm.json
@@ -0,0 +1,6749 @@
+[
+  {
+    "quest": "Autoencoders: What is the purpose of the hidden layer in an autoencoder, which typically has a smaller dimensionality than the input layer?",
+    "answers": [
+      {
+        "text": "To expand the input data to a higher-dimensional space.",
+        "image": ""
+      },
+      {
+        "text": "To learn a compressed and efficient representation of the input data.",
+        "image": ""
+      },
+      {
+        "text": "To apply a non-linear transformation to the input.",
+        "image": ""
+      },
+      {
+        "text": "To add noise to the input data for robustness.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: According to the source, why is the constraint of a smaller hidden layer important in an autoencoder?",
+    "answers": [
+      {
+        "text": "It allows the network to memorize the input.",
+        "image": ""
+      },
+      {
+        "text": "It forces the network to learn meaningful patterns and structures.",
+        "image": ""
+      },
+      {
+        "text": "It speeds up the training process.",
+        "image": ""
+      },
+      {
+        "text": "It prevents overfitting.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: Which of the following is NOT a typical use case for autoencoders mentioned in the source?",
+    "answers": [
+      {
+        "text": "Dimensionality reduction for visualization.",
+        "image": ""
+      },
+      {
+        "text": "Data compression.",
+        "image": ""
+      },
+      {
+        "text": "Feature learning for downstream tasks.",
+        "image": ""
+      },
+      {
+        "text": "Supervised classification with labeled data.",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: What does the source say about using autoencoders with unlabeled data?",
+    "answers": [
+      {
+        "text": "They are not suitable for unlabeled data.",
+        "image": ""
+      },
+      {
+        "text": "They require labeled data to extract useful patterns.",
+        "image": ""
+      },
+      {
+        "text": "They can leverage large amounts of unlabeled data to extract useful patterns.",
+        "image": ""
+      },
+      {
+        "text": "They are only useful for supervised learning tasks",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: In a linear autoencoder, what is the relationship between the input data (x) and the reconstructed output (x̂), according to the source?",
+    "answers": [
+      {
+        "text": "x̂ = VxU",
+        "image": ""
+      },
+      {
+        "text": "x̂ = UVx",
+        "image": ""
+      },
+      {
+        "text": "x̂ = x²",
+        "image": ""
+      },
+      {
+        "text": "x̂ = x + noise",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: What is the encoded representation of x in a linear autoencoder?",
+    "answers": [
+      {
+        "text": "UVx",
+        "image": ""
+      },
+      {
+        "text": "x̂",
+        "image": ""
+      },
+      {
+        "text": "Vx",
+        "image": ""
+      },
+      {
+        "text": "CCTx",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: According to the source, what is the main drawback of a linear autoencoder?",
+    "answers": [
+      {
+        "text": "It cannot be used for dimensionality reduction.",
+        "image": ""
+      },
+      {
+        "text": "It performs poorly with non-linear data.",
+        "image": ""
+      },
+      {
+        "text": "It requires extensive computational resources.",
+        "image": ""
+      },
+      {
+        "text": "The mapping is linear, limiting its ability to capture complex non-linear data.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: In the context of the source, what is Principal Component Analysis (PCA) equivalent to?",
+    "answers": [
+      {
+        "text": "A deep nonlinear autoencoder.",
+        "image": ""
+      },
+      {
+        "text": "A sparse autoencoder.",
+        "image": ""
+      },
+      {
+        "text": "A linear autoencoder with a single hidden layer and linear activation functions.",
+        "image": ""
+      },
+      {
+        "text": "A denoising autoencoder.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: What are Eigenfaces, as described in the source?",
+    "answers": [
+      {
+        "text": "A specific type of autoencoder architecture.",
+        "image": ""
+      },
+      {
+        "text": "Randomly generated facial images.",
+        "image": ""
+      },
+      {
+        "text": "The principal components (or eigenvectors) of a large set of facial images.",
+        "image": ""
+      },
+      {
+        "text": "Non-linear transformations of facial images.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: According to the source, what does the value of the eigenvalues in PCA represent?",
+    "answers": [
+      {
+        "text": "The amount of noise present in the image.",
+        "image": ""
+      },
+      {
+        "text": "The size of the dataset.",
+        "image": ""
+      },
+      {
+        "text": "The amount of variance each eigenface captures.",
+        "image": ""
+      },
+      {
+        "text": "The total number of faces in the dataset.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: How are the weights (or coefficients) for each eigenface found, according to the source?",
+    "answers": [
+      {
+        "text": "By random initialization.",
+        "image": ""
+      },
+      {
+        "text": "By applying a non-linear activation function.",
+        "image": ""
+      },
+      {
+        "text": "By projecting the original face onto the eigenfaces.",
+        "image": ""
+      },
+      {
+        "text": "By calculating the mean of the image.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: Which of the following is a limitation of using eigenfaces for face recognition as mentioned in the source?",
+    "answers": [
+      {
+        "text": "They are not sensitive to facial expressions.",
+        "image": ""
+      },
+      {
+        "text": "They are highly effective with any data set",
+        "image": ""
+      },
+      {
+        "text": "They can capture any non-linear variation of a face",
+        "image": ""
+      },
+      {
+        "text": "They are sensitive to variations in lighting, pose, and facial expressions.",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: What distinguishes a deep autoencoder from a linear autoencoder?",
+    "answers": [
+      {
+        "text": "A deep autoencoder uses a single hidden layer with linear activation.",
+        "image": ""
+      },
+      {
+        "text": "A deep autoencoder projects data onto a non-linear manifold, instead of a subspace.",
+        "image": ""
+      },
+      {
+        "text": "A deep autoencoder is only useful for supervised learning tasks",
+        "image": ""
+      },
+      {
+        "text": "A deep autoencoder uses only labeled data",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: What is the key characteristic of an undercomplete autoencoder?",
+    "answers": [
+      {
+        "text": "The hidden layer has more units than the input layer.",
+        "image": ""
+      },
+      {
+        "text": "The embedded space has a lower dimensionality than the input space.",
+        "image": ""
+      },
+      {
+        "text": "The model overfits to the training data.",
+        "image": ""
+      },
+      {
+        "text": "The hidden layer uses non-linear activation.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: What is the main challenge with overcomplete autoencoders?",
+    "answers": [
+      {
+        "text": "They are difficult to train.",
+        "image": ""
+      },
+      {
+        "text": "They perform poorly with complex data.",
+        "image": ""
+      },
+      {
+        "text": "Without proper constraints, they can overfit by simply copying the input to the output.",
+        "image": ""
+      },
+      {
+        "text": "They cannot capture the data's hidden structure.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: What is a stacked autoencoder?",
+    "answers": [
+      {
+        "text": "An autoencoder with a very small hidden layer.",
+        "image": ""
+      },
+      {
+        "text": "An autoencoder consisting of multiple encoding and decoding layers.",
+        "image": ""
+      },
+      {
+        "text": "An autoencoder that can only learn linear features.",
+        "image": ""
+      },
+      {
+        "text": "An autoencoder that operates exclusively on labeled data.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: According to the source, what does the layer-wise training of a stacked autoencoder achieve?",
+    "answers": [
+      {
+        "text": "It leads to overfitting.",
+        "image": ""
+      },
+      {
+        "text": "It makes training more complex.",
+        "image": ""
+      },
+      {
+        "text": "It simplifies optimization and ensures meaningful feature learning at each step.",
+        "image": ""
+      },
+      {
+        "text": "It eliminates the need for pre-training.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: What is a denoising autoencoder (DAE) designed to do?",
+    "answers": [
+      {
+        "text": "To compress data without loss.",
+        "image": ""
+      },
+      {
+        "text": "To learn robust, noise-resistant representations by reconstructing clean data from noisy inputs.",
+        "image": ""
+      },
+      {
+        "text": "To reduce the dimensionality of the input data.",
+        "image": ""
+      },
+      {
+        "text": "To only use clean data as input.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: How does a denoising autoencoder achieve noise-resistance?",
+    "answers": [
+      {
+        "text": "By reducing the size of the hidden layer.",
+        "image": ""
+      },
+      {
+        "text": "By training on noisy data and reconstructing the clean version.",
+        "image": ""
+      },
+      {
+        "text": "By adding random noise to the output.",
+        "image": ""
+      },
+      {
+        "text": "By adding labels to the input data.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: What is a key characteristic of a sparse autoencoder?",
+    "answers": [
+      {
+        "text": "The hidden layer has fewer units than the input.",
+        "image": ""
+      },
+      {
+        "text": "Most hidden units should have zero activation.",
+        "image": ""
+      },
+      {
+        "text": "It uses only labeled data.",
+        "image": ""
+      },
+      {
+        "text": "It maps the data onto a linear subspace.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: What is the purpose of the sparsity penalty in a sparse autoencoder?",
+    "answers": [
+      {
+        "text": "To increase the complexity of the model.",
+        "image": ""
+      },
+      {
+        "text": "To encourage most hidden units to be inactive.",
+        "image": ""
+      },
+      {
+        "text": "To increase the amount of noise",
+        "image": ""
+      },
+      {
+        "text": "To reduce the reconstruction error to zero.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: According to the source, what does the Kullback-Leibler (KL) divergence measure in the context of sparse autoencoders?",
+    "answers": [
+      {
+        "text": "The reconstruction error.",
+        "image": ""
+      },
+      {
+        "text": "The amount of noise in the input data.",
+        "image": ""
+      },
+      {
+        "text": "The difference between the average activation of hidden units and a target sparsity value.",
+        "image": ""
+      },
+      {
+        "text": "The dimensionality of the latent space.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: a) Classify data into predefined categories.",
+    "answers": [
+      {
+        "text": "Classify data into predefined categories.",
+        "image": ""
+      },
+      {
+        "text": "Reconstruct its input by predicting an approximation.",
+        "image": ""
+      },
+      {
+        "text": "Generate new data samples similar to the input.",
+        "image": ""
+      },
+      {
+        "text": "Reduce the dimensionality of the data for visualization purposes only.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: The 'bottleneck' in an autoencoder architecture refers to:",
+    "answers": [
+      {
+        "text": "The activation function used in the output layer.",
+        "image": ""
+      },
+      {
+        "text": "The hidden layer with a significantly smaller dimensionality than the input.",
+        "image": ""
+      },
+      {
+        "text": "The initial weight matrices of the encoder and decoder.",
+        "image": ""
+      },
+      {
+        "text": "The loss function that the network tries to minimize.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: What is the primary purpose of the dimensionality constraint in autoencoders?",
+    "answers": [
+      {
+        "text": "To increase the computational speed of the network.",
+        "image": ""
+      },
+      {
+        "text": "To force the network to learn meaningful patterns and structures in the data instead of memorising the input.",
+        "image": ""
+      },
+      {
+        "text": "To allow the network to handle high dimensional input data.",
+        "image": ""
+      },
+      {
+        "text": "To reduce the risk of overfitting.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: Which of the following is NOT a typical application of autoencoders?",
+    "answers": [
+      {
+        "text": "Data compression.",
+        "image": ""
+      },
+      {
+        "text": "Feature learning for downstream tasks.",
+        "image": ""
+      },
+      {
+        "text": "Supervised classification with labelled data.",
+        "image": ""
+      },
+      {
+        "text": "Dimensionality reduction for visualisation.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: A linear autoencoder, which uses linear activation functions, is functionally equivalent to:",
+    "answers": [
+      {
+        "text": "A deep neural network.",
+        "image": ""
+      },
+      {
+        "text": "Principal Component Analysis (PCA).",
+        "image": ""
+      },
+      {
+        "text": "A convolutional neural network.",
+        "image": ""
+      },
+      {
+        "text": "A recurrent neural network.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: In a linear autoencoder, the reconstruction of the input is computed as a linear transformation expressed as:",
+    "answers": [
+      {
+        "text": "x̂ = U + V + x",
+        "image": ""
+      },
+      {
+        "text": "x̂ = UVx",
+        "image": ""
+      },
+      {
+        "text": "x̂ = U * V * x",
+        "image": ""
+      },
+      {
+        "text": "x̂ =  V / U * x",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: According to the source, a linear autoencoder learns to choose a subspace that:",
+    "answers": [
+      {
+        "text": "Maximises the distance of the data points from the projections.",
+        "image": ""
+      },
+      {
+        "text": "Minimises the variance of the projections.",
+        "image": ""
+      },
+      {
+        "text": "Minimises the squared distance from the data to the projections and maximises the variance of the projections.",
+        "image": ""
+      },
+      {
+        "text": "Is randomly generated each training cycle.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: Eigenfaces are:",
+    "answers": [
+      {
+        "text": "The result of applying non-linear transformations to faces.",
+        "image": ""
+      },
+      {
+        "text": "The principal components (or eigenvectors) of a large set of facial images.",
+        "image": ""
+      },
+      {
+        "text": "A set of facial images created by averaging the original dataset.",
+        "image": ""
+      },
+      {
+        "text": "A specific type of neural network.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: What is the purpose of centering the data (subtracting the mean) before performing PCA on a set of face images?",
+    "answers": [
+      {
+        "text": "To increase the variance of the data",
+        "image": ""
+      },
+      {
+        "text": "To reduce the noise in the data",
+        "image": ""
+      },
+      {
+        "text": "To standardise the range of pixel values",
+        "image": ""
+      },
+      {
+        "text": "To ensure that the data is centered around zero",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: When recognising faces using eigenfaces, each face is represented as:",
+    "answers": [
+      {
+        "text": "A new image composed from a random set of pixels",
+        "image": ""
+      },
+      {
+        "text": "A set of binary digits",
+        "image": ""
+      },
+      {
+        "text": "A compressed image that has a lower resolution",
+        "image": ""
+      },
+      {
+        "text": "A weighted sum of eigenfaces",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: Which of the following is a limitation of using eigenfaces for facial recognition?",
+    "answers": [
+      {
+        "text": "Their high computational complexity.",
+        "image": ""
+      },
+      {
+        "text": "Their inability to generalize to different people.",
+        "image": ""
+      },
+      {
+        "text": "Their effectiveness is not dependent on the training data quality.",
+        "image": ""
+      },
+      {
+        "text": "Their sensitivity to variations in lighting, pose, and facial expressions.",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: Deep autoencoders project the data onto a:",
+    "answers": [
+      {
+        "text": "Linear subspace",
+        "image": ""
+      },
+      {
+        "text": "Nonlinear manifold",
+        "image": ""
+      },
+      {
+        "text": "Randomly generated vector space",
+        "image": ""
+      },
+      {
+        "text": "Discrete set of points",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: An undercomplete autoencoder is characterized by:",
+    "answers": [
+      {
+        "text": "A hidden layer larger than the input layer",
+        "image": ""
+      },
+      {
+        "text": "The absence of regularization techniques",
+        "image": ""
+      },
+      {
+        "text": "A hidden layer smaller than the input layer",
+        "image": ""
+      },
+      {
+        "text": "A tendency to overfit the data.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: What is the main challenge associated with overcomplete autoencoders?",
+    "answers": [
+      {
+        "text": "Difficulty in learning any meaningful features.",
+        "image": ""
+      },
+      {
+        "text": "Tendency to underfit the data due to low capacity.",
+        "image": ""
+      },
+      {
+        "text": "Overfitting by simply copying the input to the output",
+        "image": ""
+      },
+      {
+        "text": "Difficulty with using regularization techniques",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: A stacked autoencoder introduces which property to the network?",
+    "answers": [
+      {
+        "text": "Linear transformations",
+        "image": ""
+      },
+      {
+        "text": "Reduced computational costs",
+        "image": ""
+      },
+      {
+        "text": "Hierarchical representations of the input data",
+        "image": ""
+      },
+      {
+        "text": "The ability to only use labelled training data",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: In the simplified training of a stacked autoencoder:",
+    "answers": [
+      {
+        "text": "All layers are trained simultaneously",
+        "image": ""
+      },
+      {
+        "text": "The layers are only trained with labelled data",
+        "image": ""
+      },
+      {
+        "text": "The output of one layer is used as input to train the next",
+        "image": ""
+      },
+      {
+        "text": "All layers share the same weight matrices.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: What is a key feature of denoising autoencoders (DAE)?",
+    "answers": [
+      {
+        "text": "They only use labelled training data.",
+        "image": ""
+      },
+      {
+        "text": "They are trained to generate new data",
+        "image": ""
+      },
+      {
+        "text": "They are trained to directly reconstruct the input without using any noise",
+        "image": ""
+      },
+      {
+        "text": "They are trained to reconstruct clean data from noisy inputs",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: Sparse autoencoders enforce sparsity by:",
+    "answers": [
+      {
+        "text": "Ensuring that all hidden units have non-zero activation.",
+        "image": ""
+      },
+      {
+        "text": "Removing some hidden units during training.",
+        "image": ""
+      },
+      {
+        "text": "Reducing the size of the hidden layer.",
+        "image": ""
+      },
+      {
+        "text": "Encouraging most hidden units to have zero activation",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: The Kullback-Leibler (KL) divergence in sparse autoencoders is used to:",
+    "answers": [
+      {
+        "text": "Minimise the reconstruction error",
+        "image": ""
+      },
+      {
+        "text": "Match the average activation of hidden units to a target sparsity value",
+        "image": ""
+      },
+      {
+        "text": "Maximise the number of active hidden units",
+        "image": ""
+      },
+      {
+        "text": "Increase the dimensionality of the hidden layer",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "Autoencoders: What are the weights in the context of face representation using eigenfaces?",
+    "answers": [
+      {
+        "text": "The pixels that represent the image",
+        "image": ""
+      },
+      {
+        "text": "The coefficients found by projecting the original face onto the eigenfaces",
+        "image": ""
+      },
+      {
+        "text": "The eigenvectors of the faces",
+        "image": ""
+      },
+      {
+        "text": "The eigenfaces themselves",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "VAE: According to the source, the joint probability of observed data 'x' and latent variable 'z' in a VAE is expressed as:",
+    "answers": [
+      {
+        "text": "p(x,z) = p(x|z) + p(z)",
+        "image": ""
+      },
+      {
+        "text": "p(x,z) = p(x|z)p(z)",
+        "image": ""
+      },
+      {
+        "text": "p(x,z) = p(z|x)p(x)",
+        "image": ""
+      },
+      {
+        "text": "p(x,z) = p(x) / p(z)",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "VAE: The term 'p(x|z)' in the VAE framework represents:",
+    "answers": [
+      {
+        "text": "The prior distribution of the latent variable.",
+        "image": ""
+      },
+      {
+        "text": "The likelihood of observing 'x' given a specific value of 'z'.",
+        "image": ""
+      },
+      {
+        "text": "The posterior distribution of 'z' given 'x'.",
+        "image": ""
+      },
+      {
+        "text": "The marginal distribution of 'x'.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "VAE: In the VAE model, the probability of the observed data 'x' is obtained by:",
+    "answers": [
+      {
+        "text": "Maximizing p(x|z) over all possible values of 'z'.",
+        "image": ""
+      },
+      {
+        "text": "Integrating p(x|z)p(z) over all possible values of 'z'.",
+        "image": ""
+      },
+      {
+        "text": "Calculating the product of p(x|z) and p(z).",
+        "image": ""
+      },
+      {
+        "text": "Minimizing the squared difference between 'x' and its reconstruction.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "VAE: According to the source, the encoder in a VAE aims to approximate:",
+    "answers": [
+      {
+        "text": "p(x|z)",
+        "image": ""
+      },
+      {
+        "text": "p(z|x)",
+        "image": ""
+      },
+      {
+        "text": "p(z)",
+        "image": ""
+      },
+      {
+        "text": "p(x)",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "VAE: The decoder network in a VAE is parameterised by:",
+    "answers": [
+      {
+        "text": "q(z|x)",
+        "image": ""
+      },
+      {
+        "text": "p(x|z)",
+        "image": ""
+      },
+      {
+        "text": "p(z)",
+        "image": ""
+      },
+      {
+        "text": "p(x)",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "VAE: The encoder in the VAE is denoted as:",
+    "answers": [
+      {
+        "text": "p(x|z)",
+        "image": ""
+      },
+      {
+        "text": "q(z|x)",
+        "image": ""
+      },
+      {
+        "text": "p(z)",
+        "image": ""
+      },
+      {
+        "text": "p(x)",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "VAE: In a VAE, the Evidence Lower Bound (ELBO) is introduced as a:",
+    "answers": [
+      {
+        "text": "Direct computation of the marginal likelihood p(x).",
+        "image": ""
+      },
+      {
+        "text": "Tractable approximation to the log-likelihood of the observed data.",
+        "image": ""
+      },
+      {
+        "text": "A method to calculate the exact posterior p(z|x).",
+        "image": ""
+      },
+      {
+        "text": "A way to eliminate the need for integration over the latent space.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "VAE: The ELBO is represented by the expression:",
+    "answers": [
+      {
+        "text": "E[log p(x|z)] + D_KL(q(z|x)||p(z))",
+        "image": ""
+      },
+      {
+        "text": "E[log p(x|z)] - D_KL(q(z|x)||p(z))",
+        "image": ""
+      },
+      {
+        "text": "E[log q(z|x)] - D_KL(p(z|x)||q(z))",
+        "image": ""
+      },
+      {
+        "text": "E[log p(z|x)] + D_KL(p(z|x)||p(z))",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "VAE: The first term in the ELBO, E[log p(x|z)], represents:",
+    "answers": [
+      {
+        "text": "The regularization term of the model.",
+        "image": ""
+      },
+      {
+        "text": "The distribution of the latent variables",
+        "image": ""
+      },
+      {
+        "text": "The reconstruction accuracy of the decoder.",
+        "image": ""
+      },
+      {
+        "text": "The deviation of the posterior from the prior.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "VAE: The second term in the ELBO, D_KL(q(z|x)||p(z)), represents:",
+    "answers": [
+      {
+        "text": "The Kullback-Leibler divergence between the approximate posterior and the prior.",
+        "image": ""
+      },
+      {
+        "text": "The cross-entropy between the input and output.",
+        "image": ""
+      },
+      {
+        "text": "The variance of the latent space.",
+        "image": ""
+      },
+      {
+        "text": "The reconstruction accuracy of the encoder.",
+        "image": ""
+      }
+    ],
+    "correct": 0,
+    "image": ""
+  },
+  {
+    "quest": "VAE: What is the purpose of the KL divergence term in the VAE loss function?",
+    "answers": [
+      {
+        "text": "To increase the reconstruction error.",
+        "image": ""
+      },
+      {
+        "text": "To force the approximate posterior distribution of the latent variables to be close to the prior distribution.",
+        "image": ""
+      },
+      {
+        "text": "To make the latent variables deterministic.",
+        "image": ""
+      },
+      {
+        "text": "To reduce the computational complexity.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "VAE: The source mentions that if q(z|x) is equal to p(z) then:",
+    "answers": [
+      {
+        "text": "The model will underfit the data",
+        "image": ""
+      },
+      {
+        "text": "The ELBO will be equal to log(p(x))",
+        "image": ""
+      },
+      {
+        "text": "The reconstruction of x will have a lot of noise",
+        "image": ""
+      },
+      {
+        "text": "The KL-divergence term will be infinite",
+        "image": ""
+      }
+    ],
+    "correct": 0,
+    "image": ""
+  },
+  {
+    "quest": "VAE: According to the source, the approximate posterior q(z|x) is often assumed to be a:",
+    "answers": [
+      {
+        "text": "Uniform distribution",
+        "image": ""
+      },
+      {
+        "text": "Bernoulli distribution",
+        "image": ""
+      },
+      {
+        "text": "Gaussian distribution",
+        "image": ""
+      },
+      {
+        "text": "Poisson distribution",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "VAE: In a VAE, if we assume that q(z|x) is a Gaussian distribution, then what are the two outputs of the encoder network?",
+    "answers": [
+      {
+        "text": "The mean and the variance of p(x|z)",
+        "image": ""
+      },
+      {
+        "text": "The mean and the variance of p(z)",
+        "image": ""
+      },
+      {
+        "text": "The mean and the variance of q(z|x)",
+        "image": ""
+      },
+      {
+        "text": "The reconstruction and the original input",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "VAE: The reparameterisation trick in VAEs allows us to:",
+    "answers": [
+      {
+        "text": "Calculate the reconstruction error without using samples from the latent distribution.",
+        "image": ""
+      },
+      {
+        "text": "Backpropagate through the sampling process of the latent variable z.",
+        "image": ""
+      },
+      {
+        "text": "Directly use the mean and variance of q(z|x) during training.",
+        "image": ""
+      },
+      {
+        "text": "Calculate the KL divergence without the need for approximations.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "VAE: The reparameterisation trick expresses the latent variable ‘z’ as:",
+    "answers": [
+      {
+        "text": "z = μ + σ",
+        "image": ""
+      },
+      {
+        "text": "z = μ + σ ⊙ ε, where ε ~ N(0,I)",
+        "image": ""
+      },
+      {
+        "text": "z = μ * σ * ε",
+        "image": ""
+      },
+      {
+        "text": "z = μ / σ + ε",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "VAE: In the reparameterisation trick the random variable ε is drawn from:",
+    "answers": [
+      {
+        "text": "A uniform distribution",
+        "image": ""
+      },
+      {
+        "text": "A standard normal distribution",
+        "image": ""
+      },
+      {
+        "text": "The posterior distribution q(z|x)",
+        "image": ""
+      },
+      {
+        "text": "The prior distribution p(z)",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: What are the two key requirements for probabilities when using the Softmax classifier?",
+    "answers": [
+      {
+        "text": "They must be less than zero and sum to one.",
+        "image": ""
+      },
+      {
+        "text": "They must be greater than or equal to zero and not sum to one.",
+        "image": ""
+      },
+      {
+        "text": "They must be greater than or equal to zero and sum to greater than one.",
+        "image": ""
+      },
+      {
+        "text": "They must be greater than or equal to zero and sum to one",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: In the context of training a classifier, what does Maximum Likelihood Estimation aim to do?",
+    "answers": [
+      {
+        "text": "To minimise the likelihood of the observed data.",
+        "image": ""
+      },
+      {
+        "text": "To choose weights to maximise the likelihood of the observed data.",
+        "image": ""
+      },
+      {
+        "text": "To calculate the cross-entropy loss.",
+        "image": ""
+      },
+      {
+        "text": "To regularise the model.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: Cross-entropy is described as the sum of two components. Which of the following correctly identifies these components?",
+    "answers": [
+      {
+        "text": "Entropy and regularisation.",
+        "image": ""
+      },
+      {
+        "text": "Entropy and model complexity.",
+        "image": ""
+      },
+      {
+        "text": "Entropy and KL-divergence.",
+        "image": ""
+      },
+      {
+        "text": "KL-divergence and regularisation.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: What is the primary effect of L2 regularization on the weights of a model?",
+    "answers": [
+      {
+        "text": "It increases the magnitude of the weights.",
+        "image": ""
+      },
+      {
+        "text": "It \"spreads out\" the weights.",
+        "image": ""
+      },
+      {
+        "text": "It makes the model more complex.",
+        "image": ""
+      },
+      {
+        "text": "It has no effect on the weights.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: When using gradient descent to find the best weights (W), what is the relationship between the data loss and the regularization term?",
+    "answers": [
+      {
+        "text": "They are independent of each other",
+        "image": ""
+      },
+      {
+        "text": "They are used separately, data loss first then regularization",
+        "image": ""
+      },
+      {
+        "text": "Data loss is applied only if the regularization loss is too high",
+        "image": ""
+      },
+      {
+        "text": "They are combined.",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: Why is deriving gradients on paper considered a bad idea for complex models?",
+    "answers": [
+      {
+        "text": "It's too simple for non-linear functions",
+        "image": ""
+      },
+      {
+        "text": "It is very tedious, requires lots of matrix calculus and needs to be re-derived if loss changes.",
+        "image": ""
+      },
+      {
+        "text": "It only works for linear score functions.",
+        "image": ""
+      },
+      {
+        "text": "It is not possible.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: In the context of backpropagation, what are the two gradients that are multiplied using the chain rule?",
+    "answers": [
+      {
+        "text": "Upstream and downstream gradients",
+        "image": ""
+      },
+      {
+        "text": "Upstream and local gradients.",
+        "image": ""
+      },
+      {
+        "text": "Input and output gradients",
+        "image": ""
+      },
+      {
+        "text": "Weight and bias gradients",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: In a computational graph, what is the behavior of the \"add gate\" regarding gradients?",
+    "answers": [
+      {
+        "text": "It swaps multipliers.",
+        "image": ""
+      },
+      {
+        "text": "It adds gradients.",
+        "image": ""
+      },
+      {
+        "text": "It distributes gradients.",
+        "image": ""
+      },
+      {
+        "text": "It routes gradients.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: In a computational graph, what is the behavior of the \"mul gate\" regarding gradients?",
+    "answers": [
+      {
+        "text": "It adds gradients.",
+        "image": ""
+      },
+      {
+        "text": "It distributes gradients",
+        "image": ""
+      },
+      {
+        "text": "It \"swaps multiplier\".",
+        "image": ""
+      },
+      {
+        "text": "It routes gradiens",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: In a computational graph, what is the behavior of the \"copy gate\" regarding gradients?",
+    "answers": [
+      {
+        "text": "It swaps multipliers.",
+        "image": ""
+      },
+      {
+        "text": "It distributes gradients",
+        "image": ""
+      },
+      {
+        "text": "It adds gradients.",
+        "image": ""
+      },
+      {
+        "text": "It routes gradients.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: In a computational graph, what is the behaviour of the \"max gate\" regarding gradients?",
+    "answers": [
+      {
+        "text": "It swaps multipliers.",
+        "image": ""
+      },
+      {
+        "text": "It distributes gradients",
+        "image": ""
+      },
+      {
+        "text": "It adds gradients.",
+        "image": ""
+      },
+      {
+        "text": "It routes gradients.",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: In a modularized implementation of backpropagation, what is a key function of a gate/node/function object?",
+    "answers": [
+      {
+        "text": "To only compute the result of an operation.",
+        "image": ""
+      },
+      {
+        "text": "To only calculate upstream gradient",
+        "image": ""
+      },
+      {
+        "text": "To cache some values for use in backward pass and multiply upstream and local gradients.",
+        "image": ""
+      },
+      {
+        "text": "To apply the chain rule on forward pass.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: When dealing with vector derivatives, what is the derivative called when mapping from a vector to a scalar?",
+    "answers": [
+      {
+        "text": "Jacobian",
+        "image": ""
+      },
+      {
+        "text": "Hessian",
+        "image": ""
+      },
+      {
+        "text": "Gradient",
+        "image": ""
+      },
+      {
+        "text": "Laplacian",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: When dealing with vector derivatives, what is the derivative called when mapping from a vector to a vector?",
+    "answers": [
+      {
+        "text": "Gradient",
+        "image": ""
+      },
+      {
+        "text": "Hessian",
+        "image": ""
+      },
+      {
+        "text": "Jacobian",
+        "image": ""
+      },
+      {
+        "text": "Laplacian",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: In backpropagation with vectors, what is the loss (L) considered to be?",
+    "answers": [
+      {
+        "text": "A vector.",
+        "image": ""
+      },
+      {
+        "text": "A matrix.",
+        "image": ""
+      },
+      {
+        "text": "A tensor.",
+        "image": ""
+      },
+      {
+        "text": "A scalar.",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: In backpropagation with vectors, what is the relationship between the shape of dL/dx and x?",
+    "answers": [
+      {
+        "text": "dL/dx is always larger than x.",
+        "image": ""
+      },
+      {
+        "text": "dL/dx is always smaller than x.",
+        "image": ""
+      },
+      {
+        "text": "dL/dx always has the same shape as x.",
+        "image": ""
+      },
+      {
+        "text": "Their shapes are unrelated.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: When backpropagating through a matrix multiplication, what is the primary challenge in dealing with Jacobians?",
+    "answers": [
+      {
+        "text": "They are difficult to compute",
+        "image": ""
+      },
+      {
+        "text": "They are always sparse",
+        "image": ""
+      },
+      {
+        "text": "They are always dense",
+        "image": ""
+      },
+      {
+        "text": "They take too much memory, so we must work with them implicitly",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: When performing backpropagation with matrices, what does an element of X affect in the output Y (considerando che Y = X x W)?",
+    "answers": [
+      {
+        "text": "Only one element of y.",
+        "image": ""
+      },
+      {
+        "text": "Only one column of y.",
+        "image": ""
+      },
+      {
+        "text": "The whole row of y.",
+        "image": ""
+      },
+      {
+        "text": "It doesn't affect y.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: What is the core principle of backpropagation?",
+    "answers": [
+      {
+        "text": "To optimise the loss function directly",
+        "image": ""
+      },
+      {
+        "text": "To compute gradients on forward pass",
+        "image": ""
+      },
+      {
+        "text": "To recursively apply the chain rule along a computational graph to compute the gradients of all inputs/parameters/intermediates",
+        "image": ""
+      },
+      {
+        "text": "To implement forward pass without saving any intermediates",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: What are the two essential methods that nodes implement in a modularized backpropagation system?",
+    "answers": [
+      {
+        "text": "Train() and Predict()",
+        "image": ""
+      },
+      {
+        "text": "Input() and Output()",
+        "image": ""
+      },
+      {
+        "text": "Loss() and Regularisation()",
+        "image": ""
+      },
+      {
+        "text": "Forward() and Backward()",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: In a modularized backpropagation system, what is the purpose of the forward() method of a node?",
+    "answers": [
+      {
+        "text": "To apply the chain rule",
+        "image": ""
+      },
+      {
+        "text": "To compute gradients",
+        "image": ""
+      },
+      {
+        "text": "To compute the result of an operation and save any intermediates needed for gradient computation.",
+        "image": ""
+      },
+      {
+        "text": "To update parameters.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: In a modularized backpropagation system, what is the purpose of the backward() method of a node?",
+    "answers": [
+      {
+        "text": "To compute the result of an operation",
+        "image": ""
+      },
+      {
+        "text": "To save intermediates",
+        "image": ""
+      },
+      {
+        "text": "To update parameters",
+        "image": ""
+      },
+      {
+        "text": "To apply the chain rule to compute the gradient of the loss function with respect to the inputs",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: What is the primary difference between a linear classifier and a two-layer neural network?",
+    "answers": [
+      {
+        "text": "A neural network does not include a linear score function.",
+        "image": ""
+      },
+      {
+        "text": "A neural network has a simpler architecture.",
+        "image": ""
+      },
+      {
+        "text": "A neural network is linear.",
+        "image": ""
+      },
+      {
+        "text": "A neural network introduces a non-linear transformation with an activation function.",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: According to the sources, what is the consequence of building a neural network without an activation function?",
+    "answers": [
+      {
+        "text": "It becomes a more powerful non-linear classifier",
+        "image": ""
+      },
+      {
+        "text": "It becomes computationally intractable",
+        "image": ""
+      },
+      {
+        "text": "It ends up being a linear classifier",
+        "image": ""
+      },
+      {
+        "text": "It cannot learn anything",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: What does the Universal Approximation Theorem state in the context of Neural Networks?",
+    "answers": [
+      {
+        "text": "Neural networks always find a global minimum",
+        "image": ""
+      },
+      {
+        "text": "Neural networks always learn a linear function",
+        "image": ""
+      },
+      {
+        "text": "A sufficiently large neural network can approximate any discontinuous function",
+        "image": ""
+      },
+      {
+        "text": "A sufficiently large neural network can approximate any continuous function",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: What does the source say about training of Multi-Layer Perceptrons (MLPs)?",
+    "answers": [
+      {
+        "text": "It is convex.",
+        "image": ""
+      },
+      {
+        "text": "It is generally easy.",
+        "image": ""
+      },
+      {
+        "text": "It is always optimal.",
+        "image": ""
+      },
+      {
+        "text": "It is highly non-convex, with multiple local minima.",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: What does the source state is a good default choice for an activation function?",
+    "answers": [
+      {
+        "text": "Sigmoid",
+        "image": ""
+      },
+      {
+        "text": "tanh",
+        "image": ""
+      },
+      {
+        "text": "ReLU",
+        "image": ""
+      },
+      {
+        "text": "ELU",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: What does the source say about using the size of a neural network as a regularizer?",
+    "answers": [
+      {
+        "text": "It is the best approach to regularize a network",
+        "image": ""
+      },
+      {
+        "text": "It is better to use implicit regularization",
+        "image": ""
+      },
+      {
+        "text": "It is always possible",
+        "image": ""
+      },
+      {
+        "text": "It is not a good idea; stronger regularization methods are preferred",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: What are the key factors contributing to the advancements in deep learning, according to the provided text?",
+    "answers": [
+      {
+        "text": "Only massive parallel compute power",
+        "image": ""
+      },
+      {
+        "text": "Only availability of large datasets",
+        "image": ""
+      },
+      {
+        "text": "Only advances in machine learning over the years",
+        "image": ""
+      },
+      {
+        "text": "Availability of large datasets, massive parallel compute power, and advances in machine learning",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: According to the sources, what is a limitation of hand-crafted features used in traditional machine learning?",
+    "answers": [
+      {
+        "text": "They are very efficient to compute",
+        "image": ""
+      },
+      {
+        "text": "They are very efficient to train",
+        "image": ""
+      },
+      {
+        "text": "They are often task specific",
+        "image": ""
+      },
+      {
+        "text": "They might be too general or too specific",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: What is a key characteristic of features in deep learning, in contrast to traditional approaches?",
+    "answers": [
+      {
+        "text": "They are fixed",
+        "image": ""
+      },
+      {
+        "text": "They are hand-crafted",
+        "image": ""
+      },
+      {
+        "text": "They are trainable (parameterized)",
+        "image": ""
+      },
+      {
+        "text": "They are non-differentiable",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: In deep learning, what does \"end-to-end\" training refer to?",
+    "answers": [
+      {
+        "text": "The separation of feature extraction and classification",
+        "image": ""
+      },
+      {
+        "text": "Hand-crafting of features",
+        "image": ""
+      },
+      {
+        "text": "Training each layer of a network separately",
+        "image": ""
+      },
+      {
+        "text": "The joint training of feature extraction and classification as a single pipeline",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "Backpropagation: What does the source emphasize about how complex systems in deep learning are built?",
+    "answers": [
+      {
+        "text": "They use extremely complicated individual blocks",
+        "image": ""
+      },
+      {
+        "text": "They are built by hand-crafting the individual components",
+        "image": ""
+      },
+      {
+        "text": "They require an extraordinary amount of data",
+        "image": ""
+      },
+      {
+        "text": "They are built via composition of simple building blocks",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest":"ConvNets: In the context of Convolutional Neural Networks, a fully connected layer applied to a 32x32x3 image involves stretching the image into a 3072x1 vector. What is the primary drawback of this approach?",
+    "answers": [
+      {
+        "text": "It increases the computational complexity of the network.",
+        "image": ""
+      },
+      {
+        "text": "It destroys the spatial structure of the image.",
+        "image": ""
+      },
+      {
+        "text": "It requires more memory than convolutional layers.",
+        "image": ""
+      },
+      {
+        "text": "It is difficult to implement.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest":"ConvNets: A convolutional layer processes a 32x32x3 image using a 5x5x3 filter. What does the output of a single filter application produce?",
+    "answers": [
+      {
+        "text": "A 32x32x1 activation map.",
+        "image": ""
+      },
+      {
+        "text": "A 28x28x1 activation map.",
+        "image": ""
+      },
+      {
+        "text": "A single number, representing a dot product of a 5x5x3 chunk of the input with the filter, plus bias",
+        "image": ""
+      },
+      {
+        "text": "A 5x5x3 feature map.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest":"ConvNets: In a convolutional layer, if you have multiple filters, say six filters, what is the result of applying those filters to a single input image?",
+    "answers": [
+      {
+        "text": "A single activation map with increased depth.",
+        "image": ""
+      },
+      {
+        "text": "Six activation maps which are then stacked up to get a new image of the same size but with a different depth.",
+        "image": ""
+      },
+      {
+        "text": "Six activation maps that are averaged to form a single map.",
+        "image": ""
+      },
+      {
+        "text": "Six separate images of the same spatial dimension as the input.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest":"ConvNets: What does it mean for a convolutional filter to \"slide\" over the image during the convolution operation?",
+    "answers": [
+      {
+        "text": "The filter moves across the image, changing its weights at each position.",
+        "image": ""
+      },
+      {
+        "text": "The filter is applied to different channels of the input volume sequentially.",
+        "image": ""
+      },
+      {
+        "text": "The filter computes dot products with small overlapping patches of the image at each location, resulting in an activation map",
+        "image": ""
+      },
+      {
+        "text": "The filter moves in a predetermined pattern, similar to a pooling operation.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest":"ConvNets: What is a key difference between a convolutional layer and a fully connected layer in terms of how they process spatial information?",
+    "answers": [
+      {
+        "text": "A convolutional layer destroys spatial information, whereas a fully connected layer preserves it.",
+        "image": ""
+      },
+      {
+        "text": "A fully connected layer performs dot products between an input vector and a row of weights, while a convolutional layer does not",
+        "image": ""
+      },
+      {
+        "text": "A convolutional layer preserves the spatial structure of the input, whereas a fully connected layer stretches the input into a vector.",
+        "image": ""
+      },
+      {
+        "text": "A fully connected layer uses filters, whereas convolutional layers do not.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest":"ConvNets: What is the consequence of having 6 separate 5x5 filters in a convolutional layer that acts on a 32x32x3 input?",
+    "answers": [
+      {
+        "text": "A single 28x28x6 activation map will be obtained.",
+        "image": ""
+      },
+      {
+        "text": "Six 28x28x1 activation maps are obtained which are then stacked to get a 28x28x6 \"new image\".",
+        "image": ""
+      },
+      {
+        "text": "A 28x28x3 activation map is obtained.",
+        "image": ""
+      },
+      {
+        "text": "Six different 32x32x3 images are obtained.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest":"ConvNets: What are the four hyperparameters that a convolutional layer needs?",
+    "answers": [
+      {
+        "text": "Filter size, stride, number of pooling layers, and number of fully connected layers",
+        "image": ""
+      },
+      {
+        "text": "Filter size, stride, padding, and number of pooling layers.",
+        "image": ""
+      },
+      {
+        "text": "Number of filters, the filter size, the stride and the zero padding",
+        "image": ""
+      },
+      {
+        "text": "Input size, filter size, stride, and number of output channels.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest":"ConvNets: What are the two primary functions of a pooling layer in a CNN?",
+    "answers": [
+      {
+        "text": "It adds learnable parameters and introduces spatial variance.",
+        "image": ""
+      },
+      {
+        "text": "It reduces the size of the representation and introduces spatial invariance.",
+        "image": ""
+      },
+      {
+        "text": "It increases the depth of the feature maps and makes the network deeper",
+        "image": ""
+      },
+      {
+        "text": "It adds non-linearity and performs non-linear combinations of features.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest":"ConvNets: How does a max pooling layer with 2x2 filters and a stride of 2 operate?",
+    "answers": [
+      {
+        "text": "It averages the values in each 2x2 region.",
+        "image": ""
+      },
+      {
+        "text": "It takes the maximum value in each 2x2 region.",
+        "image": ""
+      },
+      {
+        "text": "It multiplies the values in each 2x2 region by a scalar",
+        "image": ""
+      },
+      {
+        "text": "It applies a learnable function to each 2x2 region.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest":"ConvNets: What is a trend in recent CNN architectures regarding pooling and fully connected layers?",
+    "answers": [
+      {
+        "text": "A trend towards more pooling layers and larger fully connected layers",
+        "image": ""
+      },
+      {
+        "text": "A trend towards larger filters and wider architectures",
+        "image": ""
+      },
+      {
+        "text": "A trend towards smaller filters and deeper architectures",
+        "image": ""
+      },
+      {
+        "text": "A trend towards getting rid of pooling and fully connected layers (just CONV layers)",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest":"ConvNets: According to the source, what is a typical structure of CNN architectures historically, before recent advancements like ResNet/GoogLeNet challenged it?",
+    "answers": [
+      {
+        "text": "A sequence of convolutional layers followed by a single fully connected layer",
+        "image": ""
+      },
+      {
+        "text": "A sequence of pooling layers followed by a sequence of convolutional layers",
+        "image": ""
+      },
+      {
+        "text": "A repeating pattern of (CONV-RELU)N followed by an optional POOL, repeated M times, followed by (FC-RELU)K, and a final SOFTMAX activation.",
+        "image": ""
+      },
+      {
+        "text": "A sequence of fully connected layers followed by a sequence of pooling layers.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: During the training phase, how does dropout modify the activation of a neuron with a dropout rate of p?",
+    "answers": [
+      {
+        "text": "The neuron's activation is scaled by a factor of p.",
+        "image": ""
+      },
+      {
+        "text": "The neuron's activation is multiplied by 1-p.",
+        "image": ""
+      },
+      {
+        "text": "The neuron's activation is set to zero with a probability of p, otherwise its activation is preserved.",
+        "image": ""
+      },
+      {
+        "text": "The neuron's activation is always set to zero.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: During inference (testing or validation), what adjustment is typically made to neuron activations in a network that uses dropout?",
+    "answers": [
+      {
+        "text": "No adjustments are made; neurons are used as they are.",
+        "image": ""
+      },
+      {
+        "text": "Neuron activations are multiplied by the dropout rate p.",
+        "image": ""
+      },
+      {
+        "text": "Neuron activations are scaled down by multiplying by (1-p).",
+        "image": ""
+      },
+      {
+        "text": "Neuron activations are set to zero.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: What is the purpose of scaling the activations by 1-p during inference when using dropout?",
+    "answers": [
+      {
+        "text": "To increase the magnitude of neuron activations.",
+        "image": ""
+      },
+      {
+        "text": "To compensate for the fact that fewer neurons were active during training.",
+        "image": ""
+      },
+      {
+        "text": "To introduce more randomness during the inference phase.",
+        "image": ""
+      },
+      {
+        "text": "To ensure the network learns different features during inference.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: In the \"inverse dropout\" formulation, when is the scaling applied to the activations, and what is the key benefit?",
+    "answers": [
+      {
+        "text": "Scaling is applied during inference, ensuring faster inference.",
+        "image": ""
+      },
+      {
+        "text": "Scaling is applied before training, resulting in faster convergence.",
+        "image": ""
+      },
+      {
+        "text": "Scaling is applied during training, ensuring the expected value of the activations remain consistent between training and inference.",
+        "image": ""
+      },
+      {
+        "text": "Scaling is applied after the backpropagation, for better generalization.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: What is the \"vanishing gradient\" problem in deep neural networks?",
+    "answers": [
+      {
+        "text": "A situation where gradients become very large, causing instability in training.",
+        "image": ""
+      },
+      {
+        "text": "The tendency of neurons to deactivate randomly during training.",
+        "image": ""
+      },
+      {
+        "text": "A phenomenon where gradients become increasingly small as they propagate backward, making training difficult.",
+        "image": ""
+      },
+      {
+        "text": "A problem that only occurs in shallow networks.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: What is the main purpose of residual connections (skip connections) in ResNets?",
+    "answers": [
+      {
+        "text": "To reduce the number of layers needed in a network.",
+        "image": ""
+      },
+      {
+        "text": "To add more non-linearity to the network.",
+        "image": ""
+      },
+      {
+        "text": "To address the vanishing gradient problem by allowing gradients to flow more easily through the network.",
+        "image": ""
+      },
+      {
+        "text": "To speed up training by reducing the number of computations.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: How does a residual connection work mathematically?",
+    "answers": [
+      {
+        "text": "It replaces the layer's output with the original input.",
+        "image": ""
+      },
+      {
+        "text": "It multiplies the layer's output by the input.",
+        "image": ""
+      },
+      {
+        "text": "It adds the layer's input directly to its output h = F(x) + x.",
+        "image": ""
+      },
+      {
+        "text": "It subtracts the input from the layer's output.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: What happens if the parameters in a residual unit are set such that F(x) = 0?",
+    "answers": [
+      {
+        "text": "The residual unit outputs a zero vector.",
+        "image": ""
+      },
+      {
+        "text": "The residual unit's output becomes exponentially large.",
+        "image": ""
+      },
+      {
+        "text": "The residual unit passes the input x through unmodified.",
+        "image": ""
+      },
+      {
+        "text": "The unit passes through a zero vector.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: What is the key advantage of residual connections in backpropagation?",
+    "answers": [
+      {
+        "text": "It simplifies the backpropagation process.",
+        "image": ""
+      },
+      {
+        "text": "It ensures that the gradients vanish more quickly.",
+        "image": ""
+      },
+      {
+        "text": "It means the derivatives don't vanish as ∇ₓ h = ∇ₓ (F(x) + x) = ∂F / ∂x + I.",
+        "image": ""
+      },
+      {
+        "text": "It forces the network to learn different features for every layer.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: What was the impact of Residual Networks (ResNets) on image classification performance on ImageNet?",
+    "answers": [
+      {
+        "text": "ResNets reduced the performance of image classification.",
+        "image": ""
+      },
+      {
+        "text": "ResNets achieved similar results to previous state-of-the-art models.",
+        "image": ""
+      },
+      {
+        "text": "ResNets achieved significantly lower error rates than previous models and even human performance, with a 152-layer ResNet achieving 4.49% top-5 error.",
+        "image": ""
+      },
+      {
+        "text": "ResNets could only be trained with a limited number of layers",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: What does \"standard scaling\" aim to achieve when preprocessing data?",
+    "answers": [
+      {
+        "text": "It ensures that each feature has a different mean.",
+        "image": ""
+      },
+      {
+        "text": "It ensures each feature (column) has zero mean and unit variance.",
+        "image": ""
+      },
+      {
+        "text": "It scales the data between 0 and 1.",
+        "image": ""
+      },
+      {
+        "text": "It increases the variance of each feature.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: In standard scaling, what do μ and σ² represent?",
+    "answers": [
+      {
+        "text": "μ is the sum and σ² is the variance of each feature/column.",
+        "image": ""
+      },
+      {
+        "text": "μ is the median and σ² is the standard deviation of each feature/column.",
+        "image": ""
+      },
+      {
+        "text": "μ is the mean and σ² is the variance of each feature/column.",
+        "image": ""
+      },
+      {
+        "text": "μ is the mean and σ² is the standard deviation of each feature/column.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: What is the main goal of Batch Normalization (BN) in neural networks?",
+    "answers": [
+      {
+        "text": "To normalize the input data before training.",
+        "image": ""
+      },
+      {
+        "text": "To learn an optimal mean and variance for each unit of the network's layers during training.",
+        "image": ""
+      },
+      {
+        "text": "To reduce the number of parameters in the network.",
+        "image": ""
+      },
+      {
+        "text": "To simplify backpropagation.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: How does Batch Normalization (BN) approximate the mean and variance?",
+    "answers": [
+      {
+        "text": "By calculating statistics over the entire dataset during each training step.",
+        "image": ""
+      },
+      {
+        "text": "By using the data in a mini-batch.",
+        "image": ""
+      },
+      {
+        "text": "By using a pre-defined set of values.",
+        "image": ""
+      },
+      {
+        "text": "By using the moving average of the previous layer.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: During BN training, how is the output H standardized, and what does ε do?",
+    "answers": [
+      {
+        "text": "H is standardized by subtracting its mean, and ε adds a small value to prevent division by zero.",
+        "image": ""
+      },
+      {
+        "text": "H is standardized by dividing by its variance, and ε increases the variance.",
+        "image": ""
+      },
+      {
+        "text": "H is standardized by multiplying by its standard deviation, and ε reduces the mean.",
+        "image": ""
+      },
+      {
+        "text": "H is standardized by adding its mean, and ε adjusts the variance.",
+        "image": ""
+      }
+    ],
+    "correct": 0,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: During BN training, what trainable parameters are introduced to set a new mean and variance for each column j?",
+    "answers": [
+      {
+        "text": "Two scalars, α and β.",
+        "image": ""
+      },
+      {
+        "text": "Two vectors, μ and σ.",
+        "image": ""
+      },
+      {
+        "text": "Two 2d values, αj and βj.",
+        "image": ""
+      },
+      {
+        "text": "Two matrices, W and b.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: What are two common solutions used during inference to avoid the output depending on the mini-batch when using BN?",
+    "answers": [
+      {
+        "text": "Training with larger batch sizes and adjusting learning rates.",
+        "image": ""
+      },
+      {
+        "text": "Applying a different set of trainable parameters and using dropout.",
+        "image": ""
+      },
+      {
+        "text": "Post-training statistics calculation and moving average of statistics.",
+        "image": ""
+      },
+      {
+        "text": "Re-training with the entire dataset and applying a different activation function.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: How does Batch Normalization (BN) work with convolutional outputs that have dimensions (b, h, w, c)?",
+    "answers": [
+      {
+        "text": "?",
+        "image": ""
+      },
+      {
+        "text": "It normalizes across all dimensions, including the batch size.",
+        "image": ""
+      },
+      {
+        "text": "The mean and variance are computed per channel, normalizing independently across spatial dimensions and batch.",
+        "image": ""
+      },
+      {
+        "text": "It normalizes only across the batch size.",
+        "image": ""
+      },
+      {
+        "text": "It normalizes across spatial dimensions only.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: What is a limitation of Batch Normalization when using small batch sizes?",
+    "answers": [
+      {
+        "text": "Batch Normalization becomes more accurate with small batch sizes.",
+        "image": ""
+      },
+      {
+        "text": "The variance in the computed mean and variance estimates can become excessively high, leading to unstable training.",
+        "image": ""
+      },
+      {
+        "text": "It makes the network simpler to train.",
+        "image": ""
+      },
+      {
+        "text": "It decreases the computational overhead.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: How does Layer Normalization differ from Batch Normalization?",
+    "answers": [
+      {
+        "text": "Layer Normalization normalizes across the batch, while Batch Normalization normalizes across features.",
+        "image": ""
+      },
+      {
+        "text": "Layer Normalization normalizes across features, while Batch Normalization normalizes across the mini-batch.",
+        "image": ""
+      },
+      {
+        "text": "Layer Normalization introduces learnable parameters, whereas Batch Normalization does not.",
+        "image": ""
+      },
+      {
+        "text": "Layer Normalization does not require the calculation of mean and variance.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: In what type of networks is Layer Normalization commonly used?",
+    "answers": [
+      {
+        "text": "CNNs for image classification.",
+        "image": ""
+      },
+      {
+        "text": "Forecasting neural networks working with time series and transformers.",
+        "image": ""
+      },
+      {
+        "text": "Generative adversarial networks (GANs).",
+        "image": ""
+      },
+      {
+        "text": "Recurrent neural networks (RNNs) for language modeling.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: What is the primary goal of data augmentation?",
+    "answers": [
+      {
+        "text": "To reduce the size of the training dataset.",
+        "image": ""
+      },
+      {
+        "text": "To decrease the complexity of the training data.",
+        "image": ""
+      },
+      {
+        "text": "To increase the size of the training dataset effectively by applying random transformations.",
+        "image": ""
+      },
+      {
+        "text": "To make the training process faster by using simpler examples.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: What are the typical transformations used in data augmentation?",
+    "answers": [
+      {
+        "text": "Only geometric transformations like flipping, cropping, and rotating.",
+        "image": ""
+      },
+      {
+        "text": "Only color and lighting adjustments like brightness, contrast and saturation.",
+        "image": ""
+      },
+      {
+        "text": "A combination of geometric transformations, color and lighting adjustments, noise and distortion, cutout/masking, and combination techniques.",
+        "image": ""
+      },
+      {
+        "text": "Only noise addition",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: What is the main purpose of using 1D convolutions for time-series data?",
+    "answers": [
+      {
+        "text": "To extract global features or patterns that evolve over time.",
+        "image": ""
+      },
+      {
+        "text": "To extract local features or patterns that evolve over time.",
+        "image": ""
+      },
+      {
+        "text": "To make the time-series data stationary.",
+        "image": ""
+      },
+      {
+        "text": "To increase the dimensionality of the input.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: How does a 1D convolution capture local dependencies in time series data?",
+    "answers": [
+      {
+        "text": "By averaging all time steps.",
+        "image": ""
+      },
+      {
+        "text": "By applying a filter of a fixed size that slides over the time series, detecting trends or repeated patterns.",
+        "image": ""
+      },
+      {
+        "text": "By only considering the first and last time steps.",
+        "image": ""
+      },
+      {
+        "text": "By considering the entire sequence at once.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: What does it mean for a 1D convolution to have parameter sharing?",
+    "answers": [
+      {
+        "text": "The parameters change over time, allowing to learn specific behaviour for certain time-steps.",
+        "image": ""
+      },
+      {
+        "text": "The same filter is applied across all time steps, reducing the number of parameters and improving generalization.",
+        "image": ""
+      },
+      {
+        "text": "Different filters are applied to different time steps.",
+        "image": ""
+      },
+      {
+        "text": "The parameters are only used for a specific subset of the input.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: What does translation invariance mean in the context of 1D convolutions?",
+    "answers": [
+      {
+        "text": "The model is sensitive to shifts in the time domain.",
+        "image": ""
+      },
+      {
+        "text": "The model does not consider the time order of the input.",
+        "image": ""
+      },
+      {
+        "text": "It helps in identifying features that are present at different time steps, making it robust to shifts in the time domain.",
+        "image": ""
+      },
+      {
+        "text": "It means the model can only detect patterns at a fixed time step.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: What is the key characteristic of a causal convolution?",
+    "answers": [
+      {
+        "text": "The output at each time step depends on future time steps.",
+        "image": ""
+      },
+      {
+        "text": "The output at each time step depends only on the current and previous time steps.",
+        "image": ""
+      },
+      {
+        "text": "The output at each time step is independent of other time steps.",
+        "image": ""
+      },
+      {
+        "text": "The output at each time step is influenced by future and past time steps",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: In the context of time series forecasting with a causal model, what is one way to train a model?",
+    "answers": [
+      {
+        "text": "Pool the output representation H over all time steps and apply a regressor head to predict xn.",
+        "image": ""
+      },
+      {
+        "text": "Use a non-causal model to train.",
+        "image": ""
+      },
+      {
+        "text": "Pool only the first few steps in the time series.",
+        "image": ""
+      },
+      {
+        "text": "Disregard the time dependencies between the series.",
+        "image": ""
+      }
+    ],
+    "correct": 0,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: What is the difference between how 1D convolution and self-attention model sequences?",
+    "answers": [
+      {
+        "text": "1D convolution captures global dependencies, while self-attention captures local dependencies.",
+        "image": ""
+      },
+      {
+        "text": "1D convolution has quadratic complexity, while self-attention is more efficient.",
+        "image": ""
+      },
+      {
+        "text": "1D convolution captures local patterns using a sliding filter, while self-attention computes interactions between all elements.",
+        "image": ""
+      },
+      {
+        "text": "1D convolution uses weights for each pair of inputs, while self-attention shares weights.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: In self-attention mechanisms, what are the three transformed vectors derived from each token's embedding?",
+    "answers": [
+      {
+        "text": "Input, output, and hidden vectors.",
+        "image": ""
+      },
+      {
+        "text": "Weight, bias, and activation vectors.",
+        "image": ""
+      },
+      {
+        "text": "Query, Key, and Value vectors.",
+        "image": ""
+      },
+      {
+        "text": "Gradient, loss, and prediction vectors.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: In the analogy with a web search, what corresponds to the \"Query\" vector in self-attention?",
+    "answers": [
+      {
+        "text": "The titles of web pages.",
+        "image": ""
+      },
+      {
+        "text": "The content of web pages.",
+        "image": ""
+      },
+      {
+        "text": "The search term you type—what you're looking for.",
+        "image": ""
+      },
+      {
+        "text": "The search engine algorithm.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: What is the role of \"masking\" in masked self-attention?",
+    "answers": [
+      {
+        "text": "To amplify the attention scores of future tokens.",
+        "image": ""
+      },
+      {
+        "text": "To ensure the model only focuses on past tokens when predicting the next token.",
+        "image": ""
+      },
+      {
+        "text": "To randomize the attention scores to avoid bias.",
+        "image": ""
+      },
+      {
+        "text": "To ignore the past tokens and focus only on future tokens.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: What is the purpose of passing the concatenated outputs of multiple self-attention heads through a Multilayer Perceptron (MLP) layer?",
+    "answers": [
+      {
+        "text": "To reduce the dimensionality of the output.",
+        "image": ""
+      },
+      {
+        "text": "To enhance the model's representational capacity after capturing diverse relationships.",
+        "image": ""
+      },
+      {
+        "text": "To compute attention scores.",
+        "image": ""
+      },
+      {
+        "text": "To apply positional embeddings.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: What is the \"Add\" operation in a Transformer block?",
+    "answers": [
+      {
+        "text": "A fully connected layer",
+        "image": ""
+      },
+      {
+        "text": "A pooling layer",
+        "image": ""
+      },
+      {
+        "text": "A residual connection",
+        "image": ""
+      },
+      {
+        "text": "A layer normalization",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: What is the \"Norm\" operation in a Transformer block?",
+    "answers": [
+      {
+        "text": "A fully connected layer.",
+        "image": ""
+      },
+      {
+        "text": "A residual connection",
+        "image": ""
+      },
+      {
+        "text": "A Batch Normalization",
+        "image": ""
+      },
+      {
+        "text": "A Layer Normalization",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: What is the first step in representing text as input for a transformer?",
+    "answers": [
+      {
+        "text": "Applying a softmax function to the input text.",
+        "image": ""
+      },
+      {
+        "text": "Dividing text into tokens and converting them into numerical vectors called embeddings.",
+        "image": ""
+      },
+      {
+        "text": "Normalizing the text using a standard scaler.",
+        "image": ""
+      },
+      {
+        "text": "Applying data augmentation techniques to the text.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: What are the main issues with word encoders?",
+    "answers": [
+      {
+        "text": "They are difficult to train and implement.",
+        "image": ""
+      },
+      {
+        "text": "They require huge computational power to represent words.",
+        "image": ""
+      },
+      {
+        "text": "They need to detect boundaries of words and treat different forms of the same word as separate types.",
+        "image": ""
+      },
+      {
+        "text": "They don't capture the semantic meaning of the words",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: What are the characteristics of character encoders?",
+    "answers": [
+      {
+        "text": "They increase the complexity of the model and are easy to use.",
+        "image": ""
+      },
+      {
+        "text": "They reduce the complexity but are almost impossible to use.",
+        "image": ""
+      },
+      {
+        "text": "They make the model more robust and efficient.",
+        "image": ""
+      },
+      {
+        "text": "They are ideal for most NLP tasks.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: What is the byte pair encoding (BPE) algorithm used for?",
+    "answers": [
+      {
+        "text": "To represent each word as a single byte.",
+        "image": ""
+      },
+      {
+        "text": "To represent each character in a text as an integer.",
+        "image": ""
+      },
+      {
+        "text": "To create subword tokens by merging frequent character sequences.",
+        "image": ""
+      },
+      {
+        "text": "To compress text data into smaller files.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: What is the first step in the BPE algorithm?",
+    "answers": [
+      {
+        "text": "Count the frequency of each character pair in the data",
+        "image": ""
+      },
+      {
+        "text": "Merge the characters into one symbol.",
+        "image": ""
+      },
+      {
+        "text": "Form a base vocabulary of all characters that occur in the training data.",
+        "image": ""
+      },
+      {
+        "text": "Tokenize the data.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: What is the purpose of positional encodings in Transformer models?",
+    "answers": [
+      {
+        "text": "To reduce the dimensionality of the input.",
+        "image": ""
+      },
+      {
+        "text": "To inject order into the model by embedding position-specific information.",
+        "image": ""
+      },
+      {
+        "text": "To increase the variance of the input data.",
+        "image": ""
+      },
+      {
+        "text": "To prevent the model from overfitting.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: Why are positional encodings needed in transformers?",
+    "answers": [
+      {
+        "text": "Transformers do not need positional encodings because they can infer the order of the input.",
+        "image": ""
+      },
+      {
+        "text": "Because Transformers process all tokens simultaneously, they need positional encodings to be aware of the sequence information.",
+        "image": ""
+      },
+      {
+        "text": "Positional encodings are only needed for time-series data.",
+        "image": ""
+      },
+      {
+        "text": "Because Transformers can easily capture the order of the sequence.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: How do relative positional embeddings work?",
+    "answers": [
+      {
+        "text": "They add information about the absolute position of the tokens.",
+        "image": ""
+      },
+      {
+        "text": "They only use static positional information.",
+        "image": ""
+      },
+      {
+        "text": "They consider the relative distance between tokens instead of their absolute positions.",
+        "image": ""
+      },
+      {
+        "text": "They only encode the first and last positions of the tokens.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: What are the outputs of the transformer block and what are they used for?",
+    "answers": [
+      {
+        "text": "Logits, which represent probabilities of the next token, are used to select the most probable token or sample one.",
+        "image": ""
+      },
+      {
+        "text": "Embeddings, which are used for classification tasks.",
+        "image": ""
+      },
+      {
+        "text": "Attention scores, which are used for image generation.",
+        "image": ""
+      },
+      {
+        "text": "Key and value matrices, used for backpropagation",
+        "image": ""
+      }
+    ],
+    "correct": 0,
+    "image": ""
+  },
+  {
+    "quest": "MoreNN: What is the role of the \"temperature\" hyperparameter when generating text from a language model?",
+    "answers": [
+      {
+        "text": "The \"temperature\" is used to adjust the learning rate of the model",
+        "image": ""
+      },
+      {
+        "text": "It is used to adjust the size of the model",
+        "image": ""
+      },
+      {
+        "text": "It is used to control the size of the vocabulary used by the model",
+        "image": ""
+      },
+      {
+        "text": "It controls the randomness of the output by sharpening or softening the probability distribution.",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "LR: What is the primary goal of the cost function in linear regression?",
+    "answers": [
+      {
+        "text": "To maximize the difference between predicted and actual output values.",
+        "image": ""
+      },
+      {
+        "text": "To identify the optimal number of features for a model.",
+        "image": ""
+      },
+      {
+        "text": "To minimize the error between the predicted values and the actual target values.",
+        "image": ""
+      },
+      {
+        "text": "To determine the correlation between input and output variables.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "LR: In the context of gradient descent, which statement accurately describes the effect of the learning rate (α)?",
+    "answers": [
+      {
+        "text": "A larger learning rate guarantees faster convergence to the global minimum.",
+        "image": ""
+      },
+      {
+        "text": "A smaller learning rate might cause the gradient descent to diverge from the minimum.",
+        "image": ""
+      },
+      {
+        "text": "If α is too small, gradient descent will be slow, and if α is too large, gradient descent might overshoot the minimum and even diverge.",
+        "image": ""
+      },
+      {
+        "text": "The learning rate should automatically decrease over time as gradient descent approaches a local minimum.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "LR: What is the key distinction between \"batch\" gradient descent and stochastic gradient descent (SGD)?",
+    "answers": [
+      {
+        "text": "Batch gradient descent is an online method, while SGD is an offline method.",
+        "image": ""
+      },
+      {
+        "text": "Batch gradient descent updates parameters after each training example, whereas SGD does it using all training examples.",
+        "image": ""
+      },
+      {
+        "text": "Batch gradient descent calculates the gradient using all training examples in each iteration, while SGD uses only a single training example to update the gradient in each iteration.",
+        "image": ""
+      },
+      {
+        "text": "SGD is slower and requires more iterations than batch gradient descent.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "LR: Which of the following is NOT a method to calculate simple linear regression?",
+    "answers": [
+      {
+        "text": "Gradient Descent",
+        "image": ""
+      },
+      {
+        "text": "Normal equation",
+        "image": ""
+      },
+      {
+        "text": "Principal Component Analysis",
+        "image": ""
+      },
+      {
+        "text": "Software packages, e.g., NumPy polyfit",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "LR: What is the purpose of feature scaling in linear regression, and how is mean normalization typically applied?",
+    "answers": [
+      {
+        "text": "To increase the magnitude of the features and make the gradient descent faster.",
+        "image": ""
+      },
+      {
+        "text": "Feature scaling ensures that the features are on a similar scale and mean normalization replaces the feature value with *x*<sub>*i*</sub> - *μ*<sub>*i*</sub>, to have approximately zero mean.",
+        "image": ""
+      },
+      {
+        "text": "To add random noise to the features in order to prevent overfitting.",
+        "image": ""
+      },
+      {
+        "text": "Feature scaling is not required if we are using the normal equation.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "LR: What does the normal equation provide in the context of linear regression?",
+    "answers": [
+      {
+        "text": "An iterative approach to find the parameters that minimise the cost function.",
+        "image": ""
+      },
+      {
+        "text": "A direct analytical method to compute the parameters (Θ) that minimise the cost function.",
+        "image": ""
+      },
+      {
+        "text": "An alternative to gradient descent that is faster regardless of the number of features.",
+        "image": ""
+      },
+      {
+        "text": "A way to determine the appropriate learning rate for gradient descent.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "LR: When might the normal equation be computationally inefficient and what might be a workaround?",
+    "answers": [
+      {
+        "text": "When the number of training examples is very high, one should use gradient descent instead",
+        "image": ""
+      },
+      {
+        "text": "When the number of features is very high, and as a work-around, delete some features or use regularization",
+        "image": ""
+      },
+      {
+        "text": "The normal equation is always computationally efficient, irrespective of the number of features",
+        "image": ""
+      },
+      {
+        "text": "It is inefficient when there is correlation, therefore it requires an alternative method",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "LR: According to the sources, what does a Pearson correlation coefficient (r) of -1 signify?",
+    "answers": [
+      {
+        "text": "No correlation between the variables",
+        "image": ""
+      },
+      {
+        "text": "A moderate positive correlation between the variables",
+        "image": ""
+      },
+      {
+        "text": "A maximum negative correlation between the variables",
+        "image": ""
+      },
+      {
+        "text": "A maximum positive correlation between the variables",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "LR: What does the coefficient of determination (R²) measure in regression analysis?",
+    "answers": [
+      {
+        "text": "The correlation between variables.",
+        "image": ""
+      },
+      {
+        "text": "The goodness-of-fit of a line or curve to the data points.",
+        "image": ""
+      },
+      {
+        "text": "The slope of the regression line.",
+        "image": ""
+      },
+      {
+        "text": "The complexity of the regression model.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "LR: What is a key characteristic that distinguishes locally-weighted regression from linear regression?",
+    "answers": [
+      {
+        "text": "Locally-weighted regression uses a fixed set of parameters",
+        "image": ""
+      },
+      {
+        "text": "Locally-weighted regression parameters grow with the data making it a non-parametric learning algorithm, whilst linear regression uses a fixed set of parameters.",
+        "image": ""
+      },
+      {
+        "text": "Locally-weighted regression is faster than linear regression.",
+        "image": ""
+      },
+      {
+        "text": "Locally weighted regression uses gradient descent.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "LR: According to the sources, what is the primary assumption underlying the probabilistic interpretation of least squares?",
+    "answers": [
+      {
+        "text": "That the target value y is equal to Θ<sup>T</sup>x plus some random error.",
+        "image": ""
+      },
+      {
+        "text": "That the input features are normally distributed.",
+        "image": ""
+      },
+      {
+        "text": "That the parameters Θ are fixed and known.",
+        "image": ""
+      },
+      {
+        "text": "That the regression line has zero error.",
+        "image": ""
+      }
+    ],
+    "correct": 0,
+    "image": ""
+  },
+  {
+    "quest": "LR2: In the context of the normal equation, what does the term 'X' represent?",
+    "answers": [
+      {
+        "text": "The vector of predicted values.",
+        "image": ""
+      },
+      {
+        "text": "The matrix of target variables.",
+        "image": ""
+      },
+      {
+        "text": "The matrix of input features.",
+        "image": ""
+      },
+      {
+        "text": "The vector of errors.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "LR2: What does θ represent in the normal equation?",
+    "answers": [
+      {
+        "text": "The error term",
+        "image": ""
+      },
+      {
+        "text": "The predicted values",
+        "image": ""
+      },
+      {
+        "text": "The parameter vector that we aim to find",
+        "image": ""
+      },
+      {
+        "text": "The input features",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "LR2: Given the cost function J(θ) = 1/2 * ||Xθ − y||², which statement correctly describes how the normal equation is derived?",
+    "answers": [
+      {
+        "text": "The gradient of J(θ) is set to a non-zero constant to minimise the cost function.",
+        "image": ""
+      },
+      {
+        "text": "The cost function is directly minimised by setting its partial derivative to the identity matrix.",
+        "image": ""
+      },
+      {
+        "text": "The gradient of J(θ) is set to zero to find the optimal parameter vector.",
+        "image": ""
+      },
+      {
+        "text": "The cost function is minimised by setting the second derivative to zero.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "LR2: The normal equation solution θ = (XᵀX)⁻¹Xᵀy can be computed when:",
+    "answers": [
+      {
+        "text": "XᵀX is a singular matrix.",
+        "image": ""
+      },
+      {
+        "text": "X has more rows than columns",
+        "image": ""
+      },
+      {
+        "text": "XᵀX is invertible",
+        "image": ""
+      },
+      {
+        "text": "X has linearly dependent columns",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "LR2: What is the rank condition for XᵀX to be invertible?",
+    "answers": [
+      {
+        "text": "X must have a rank equal to the number of rows.",
+        "image": ""
+      },
+      {
+        "text": "X must be a symmetric matrix.",
+        "image": ""
+      },
+      {
+        "text": "X must have linearly independent columns which equals the number of features (m).",
+        "image": ""
+      },
+      {
+        "text": "X must be a square matrix with a determinant of 1.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "LR2: In the equation for θ, θ = (XᵀX)⁻¹Xᵀy what does the (XᵀX)⁻¹ term represent?",
+    "answers": [
+      {
+        "text": "The pseudo-inverse of the feature matrix",
+        "image": ""
+      },
+      {
+        "text": "The transpose of the feature matrix",
+        "image": ""
+      },
+      {
+        "text": "The inverse of the matrix product X transpose times X.",
+        "image": ""
+      },
+      {
+        "text": "The dot product of X with itself.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "LR2: If XᵀX is not invertible, what can be inferred about the feature matrix X?",
+    "answers": [
+      {
+        "text": "The feature matrix is not real",
+        "image": ""
+      },
+      {
+        "text": "The feature matrix contains all zeros",
+        "image": ""
+      },
+      {
+        "text": "The feature matrix has linearly dependent columns",
+        "image": ""
+      },
+      {
+        "text": "The feature matrix contains no features",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "LR2: What is a key difference between linear regression and locally weighted regression (LWR)?",
+    "answers": [
+      {
+        "text": "Linear regression uses a kernel function, while LWR does not.",
+        "image": ""
+      },
+      {
+        "text": "LWR assigns weights to data points based on their proximity to the query point, while linear regression does not.",
+        "image": ""
+      },
+      {
+        "text": "Linear regression uses a constant for error calculation while LWR does not.",
+        "image": ""
+      },
+      {
+        "text": "LWR computes global parameters, while linear regression computes local parameters.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "LR2: In Locally Weighted Regression (LWR), what is the purpose of the kernel function?",
+    "answers": [
+      {
+        "text": "To perform a linear transformation of the input data.",
+        "image": ""
+      },
+      {
+        "text": "To reduce the dimensionality of the feature matrix.",
+        "image": ""
+      },
+      {
+        "text": "To give higher weights to points closer to the query point and lower weights to points farther away.",
+        "image": ""
+      },
+      {
+        "text": "To transform all data into a standard normal distribution.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "LR2: What does the term 'τ' (tau) represent in the context of the weighting function for Locally Weighted Regression (LWR)?",
+    "answers": [
+      {
+        "text": "The weighting parameter",
+        "image": ""
+      },
+      {
+        "text": "The inverse of the feature matrix",
+        "image": ""
+      },
+      {
+        "text": "The variance of the data",
+        "image": ""
+      },
+      {
+        "text": "The bandwidth parameter, controlling the width of the kernel.",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "LR2: What is an advantage of locally weighted regression compared to standard linear regression?",
+    "answers": [
+      {
+        "text": "LWR is always faster to compute than linear regression.",
+        "image": ""
+      },
+      {
+        "text": "LWR is not affected by outliers.",
+        "image": ""
+      },
+      {
+        "text": "LWR can model non-linear relationships between the input features and the target variable.",
+        "image": ""
+      },
+      {
+        "text": "LWR always has a unique solution and no risk of overfitting.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "LR2: Based on the diagram in the source, which of the following can be described as 'overfitting'?",
+    "answers": [
+      {
+        "text": "The model underfits the data.",
+        "image": ""
+      },
+      {
+        "text": "The model perfectly fits all data points including the noise in the data.",
+        "image": ""
+      },
+      {
+        "text": "The model has very high flexibility and thus it captures the random variations and noise in the training data, not generalising well to unseen data.",
+        "image": ""
+      },
+      {
+        "text": "The model gives a completely inaccurate fit to the data.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "BinaryClass: Given a binary classification scenario, which of the following is the correct interpretation of the notation 'y = 1'?",
+    "answers": [
+      {
+        "text": "It represents a negative outcome",
+        "image": ""
+      },
+      {
+        "text": "It represents an positive outcome",
+        "image": ""
+      },
+      {
+        "text": "It indicates an unknown outcome",
+        "image": ""
+      },
+      {
+        "text": "It symbolizes the probability of an event",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "BinaryClass: In the context of logistic regression, what does the notation 'p(y|x; θ)' represent?",
+    "answers": [
+      {
+        "text": "The probability of observing feature 'x' given the parameters 'θ'.",
+        "image": ""
+      },
+      {
+        "text": "The probability of the parameters 'θ' given the label 'y' and the feature 'x'.",
+        "image": ""
+      },
+      {
+        "text": "The probability of label 'y' given feature 'x' and the parameters 'θ' .",
+        "image": ""
+      },
+      {
+        "text": "The likelihood of feature 'x' being present",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "BinaryClass: Based on the notes, what is the primary purpose of the sigmoid function, denoted as 'g(z)'?",
+    "answers": [
+      {
+        "text": "To directly predict the class label.",
+        "image": ""
+      },
+      {
+        "text": "To map the output of a linear combination of features to a probability between 0 and 1.",
+        "image": ""
+      },
+      {
+        "text": "To calculate the error in the classification.",
+        "image": ""
+      },
+      {
+        "text": "To optimize the parameters 'θ'",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "BinaryClass: According to the notes, how is the decision boundary determined in the context of binary classification using a linear model?",
+    "answers": [
+      {
+        "text": "By maximizing the probability p(y|x;θ)",
+        "image": ""
+      },
+      {
+        "text": "By setting the sigmoid function g(z) to 0",
+        "image": ""
+      },
+      {
+        "text": "By finding the line where g(z) = 0.5 which occurs when θ₀ + θ₁x₁ + θ₂x₂ = 0",
+        "image": ""
+      },
+      {
+        "text": "By minimizing the cost function J(θ)",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "BinaryClass: In the provided material, what does the notation 'J(θ)' represent?",
+    "answers": [
+      {
+        "text": "The probability of observing the features given the parameters",
+        "image": ""
+      },
+      {
+        "text": "The cost function used to evaluate the model's performance",
+        "image": ""
+      },
+      {
+        "text": "The gradient of the model's parameters",
+        "image": ""
+      },
+      {
+        "text": "The model's prediction",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "BinaryClass: What is the primary goal of the optimization process with respect to J(θ)?",
+    "answers": [
+      {
+        "text": "To maximize J(θ)",
+        "image": ""
+      },
+      {
+        "text": "To calculate the Hessian matrix",
+        "image": ""
+      },
+      {
+        "text": "To minimize J(θ)",
+        "image": ""
+      },
+      {
+        "text": "To find the gradient of J(θ)",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "BinaryClass: According to the notes, what is the significance of the term 'yi' in the cost function J(θ)?",
+    "answers": [
+      {
+        "text": "It represents the predicted label",
+        "image": ""
+      },
+      {
+        "text": "It is a feature value",
+        "image": ""
+      },
+      {
+        "text": "It is the learning rate",
+        "image": ""
+      },
+      {
+        "text": "It represents the true label",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "BinaryClass: Which of the following best describes the update rule for θ using gradient descent, as per the source?",
+    "answers": [
+      {
+        "text": "θ = θ + α∇J(θ)",
+        "image": ""
+      },
+      {
+        "text": "θ = θ - α∇J(θ)",
+        "image": ""
+      },
+      {
+        "text": "θ = α∇J(θ)",
+        "image": ""
+      },
+      {
+        "text": "θ = ∇J(θ)",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "BinaryClass: In the context of the provided notes, what is the purpose of the expression (sigmoid(z(i)) − y(i)) in the gradient calculation?",
+    "answers": [
+      {
+        "text": "To calculate the total number of training examples",
+        "image": ""
+      },
+      {
+        "text": "To represent the regularization term",
+        "image": ""
+      },
+      {
+        "text": "To measure the difference between the predicted probability and the true label",
+        "image": ""
+      },
+      {
+        "text": "To compute the Hessian matrix",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "BinaryClass: According to the provided material, what does the Newton-Raphson method aim to accomplish?",
+    "answers": [
+      {
+        "text": "It uses the gradient to reach an optimum.",
+        "image": ""
+      },
+      {
+        "text": "It finds the minimum by directly inverting the Hessian",
+        "image": ""
+      },
+      {
+        "text": "It is used to find the roots of a function by updating the parameters using the Hessian matrix",
+        "image": ""
+      },
+      {
+        "text": "It is used to calculate the gradient of the cost function",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "BinaryClass: What does the term H in the update equation θ := θ − H⁻¹∇J(θ) refer to (Newton method)?",
+    "answers": [
+      {
+        "text": "?",
+        "image": ""
+      },
+      {
+        "text": "The gradient of the cost function",
+        "image": ""
+      },
+      {
+        "text": "The learning rate",
+        "image": ""
+      },
+      {
+        "text": "The Hessian matrix of the cost function",
+        "image": ""
+      },
+      {
+        "text": "The sigmoid function",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "BinaryClass: What is a key drawback mentioned in the notes regarding the Newton-Raphson method?",
+    "answers": [
+      {
+        "text": "It converges very slowly.",
+        "image": ""
+      },
+      {
+        "text": "It always finds the global minimum.",
+        "image": ""
+      },
+      {
+        "text": "It requires the calculation of the inverse of the Hessian matrix which is expensive to compute",
+        "image": ""
+      },
+      {
+        "text": "It only works for linear models.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "BinaryClass: According to the material, what algorithm is suggested as a practical alternative to Newton's method?",
+    "answers": [
+      {
+        "text": "Stochastic gradient descent.",
+        "image": ""
+      },
+      {
+        "text": "BFGS (Broyden-Fletcher-Goldfarb-Shanno algorithm)",
+        "image": ""
+      },
+      {
+        "text": "Linear Regression.",
+        "image": ""
+      },
+      {
+        "text": "The conjugate gradient method.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "BinaryClass: What is the relationship between the gradient and the direction of steepest ascent of a function J(θ)?",
+    "answers": [
+      {
+        "text": "The gradient points in the direction of the steepest decrease",
+        "image": ""
+      },
+      {
+        "text": "The gradient points in the direction of the steepest increase",
+        "image": ""
+      },
+      {
+        "text": "The gradient is orthogonal to the direction of the steepest ascent",
+        "image": ""
+      },
+      {
+        "text": "The gradient provides no information about the direction of the steepest ascent",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "BinaryClass2: According to the source, what does \"Log Loss\" or \"Negative Log Likelihood\" (NLL) measure in the context of classification?",
+    "answers": [
+      {
+        "text": "The accuracy of the model’s predictions",
+        "image": ""
+      },
+      {
+        "text": "The sum of squared errors.",
+        "image": ""
+      },
+      {
+        "text": "The difference between predicted probabilities and true labels",
+        "image": ""
+      },
+      {
+        "text": "The margin of separation between classes.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "BinaryClass2: What does the term 'logits' refer to in the document?",
+    "answers": [
+      {
+        "text": "The output of the Softmax function.",
+        "image": ""
+      },
+      {
+        "text": "The predicted probabilities of each class",
+        "image": ""
+      },
+      {
+        "text": "The raw, unnormalized scores that are input to the Softmax",
+        "image": ""
+      },
+      {
+        "text": "The loss calculated during backpropagation.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "BinaryClass2: According to the source, what is the effect of the exponential function within Softmax?",
+    "answers": [
+      {
+        "text": "To normalize values between 0 and 1.",
+        "image": ""
+      },
+      {
+        "text": "To produce a weighted average of the inputs.",
+        "image": ""
+      },
+      {
+        "text": "To ensure that all scores are positive",
+        "image": ""
+      },
+      {
+        "text": "To amplify the differences between the raw scores",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "BinaryClass2: In the context of the provided document, what is the significance of the term \"cross-entropy\"?",
+    "answers": [
+      {
+        "text": "It measures the complexity of the model.",
+        "image": ""
+      },
+      {
+        "text": "It measures the average number of bits needed to encode data.",
+        "image": ""
+      },
+      {
+        "text": "It measures the difference between probability distributions",
+        "image": ""
+      },
+      {
+        "text": "It quantifies the uncertainty of predictions for multiple classes",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "BinaryClass2: What does the diagram with the red, green and blue points with decision boundaries represent?",
+    "answers": [
+      {
+        "text": "The training of a linear regression model.",
+        "image": ""
+      },
+      {
+        "text": "The concept of bias, variance and underfitting and overfitting.",
+        "image": ""
+      },
+      {
+        "text": "The function of gradient descent",
+        "image": ""
+      },
+      {
+        "text": "A graphical representation of the Softmax function",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "BinaryClass2: Based on the document, what is the effect of \"overfitting\" on the model?",
+    "answers": [
+      {
+        "text": "The model generalizes well to new, unseen data.",
+        "image": ""
+      },
+      {
+        "text": "The model memorizes the training data instead of learning the underlying patterns",
+        "image": ""
+      },
+      {
+        "text": "The model is too simple to capture the complexity of the data.",
+        "image": ""
+      },
+      {
+        "text": "The model has a high bias.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "BinaryClass2: What does 'bias' in the context of model training refer to in the document?",
+    "answers": [
+      {
+        "text": "The model's tendency to consistently make incorrect assumptions",
+        "image": ""
+      },
+      {
+        "text": "The variability in the model’s predictions.",
+        "image": ""
+      },
+      {
+        "text": "The amount of training data used.",
+        "image": ""
+      },
+      {
+        "text": "The complexity of the model architecture.",
+        "image": ""
+      }
+    ],
+    "correct": 0,
+    "image": ""
+  },
+  {
+    "quest": "BinaryClass2: According to the document, what does 'variance' in the context of model training refer to?",
+    "answers": [
+      {
+        "text": "The model's ability to make consistent predictions.",
+        "image": ""
+      },
+      {
+        "text": "The model’s sensitivity to changes in the training data.",
+        "image": ""
+      },
+      {
+        "text": "The model's tendency to make consistent errors.",
+        "image": ""
+      },
+      {
+        "text": "The bias in the model's assumptions.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "BinaryClass2: What does the source suggest about the relationship between model complexity and bias and variance?",
+    "answers": [
+      {
+        "text": "Increasing model complexity always reduces bias and variance.",
+        "image": ""
+      },
+      {
+        "text": "Increasing model complexity may reduce bias but increase variance.",
+        "image": ""
+      },
+      {
+        "text": "Decreasing model complexity always reduces both bias and variance.",
+        "image": ""
+      },
+      {
+        "text": "Bias and variance are not affected by the model complexity.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "BinaryClass2: What do the dashed lines in the model representation diagrams indicate?",
+    "answers": [
+      {
+        "text": "Hyperplanes that separate the classes.",
+        "image": ""
+      },
+      {
+        "text": "The decision boundaries of an overfitted model.",
+        "image": ""
+      },
+      {
+        "text": "The margin of separation between classes.",
+        "image": ""
+      },
+      {
+        "text": "Areas of underfitting.",
+        "image": ""
+      }
+    ],
+    "correct": 0,
+    "image": ""
+  },
+  {
+    "quest": "BinaryClass2: What does the concept of 'Expected Error' (E(Err)) in the context of the document represent?",
+    "answers": [
+      {
+        "text": "The bias of the model.",
+        "image": ""
+      },
+      {
+        "text": "The variance of the model.",
+        "image": ""
+      },
+      {
+        "text": "The sum of Bias and Variance.",
+        "image": ""
+      },
+      {
+        "text": "The irreducible error of the data.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "BiasVariance: In the context of bias-variance tradeoff, what does \"high variance\" typically indicate about a machine learning model?",
+    "answers": [
+      {
+        "text": "The model is too simple and underfits the training data.",
+        "image": ""
+      },
+      {
+        "text": "The model is too complex and overfits the training data.",
+        "image": ""
+      },
+      {
+        "text": "The model has a strong bias towards a specific class.",
+        "image": ""
+      },
+      {
+        "text": "The model has low statistical efficiency.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "BiasVariance: What is the primary purpose of regularization in machine learning?",
+    "answers": [
+      {
+        "text": "To increase the model's complexity and reduce bias.",
+        "image": ""
+      },
+      {
+        "text": "To reduce the model's complexity and prevent overfitting.",
+        "image": ""
+      },
+      {
+        "text": "To improve the model's performance on the training data.",
+        "image": ""
+      },
+      {
+        "text": "To increase the model's variance.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "BiasVariance: Why is choosing hyperparameters based solely on the training data considered a bad practice?",
+    "answers": [
+      {
+        "text": "It can lead to a decrease in the model's variance.",
+        "image": ""
+      },
+      {
+        "text": "It does not provide information about how the algorithm will perform on new, unseen data.",
+        "image": ""
+      },
+      {
+        "text": "It will always choose the least complex model.",
+        "image": ""
+      },
+      {
+        "text": "It would always lead to choosing the most complex model.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "BiasVariance: What is the primary advantage of using k-fold cross-validation, compared to a single hold-out validation set?",
+    "answers": [
+      {
+        "text": "It is less computationally expensive.",
+        "image": ""
+      },
+      {
+        "text": "It is better suited for large datasets.",
+        "image": ""
+      },
+      {
+        "text": "It makes better use of small datasets.",
+        "image": ""
+      },
+      {
+        "text": "It always chooses the most complex model.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "BiasVariance: According to the sources, which of the following represents the correct sequence of steps when using a hold-out cross validation method?",
+    "answers": [
+      {
+        "text": "Train each model on Sdev, choose the model with lowest error on Strain, optionally evaluate on Stest.",
+        "image": ""
+      },
+      {
+        "text": "Split S into Strain, Sdev and Stest, train each model on Strain, choose model with lowest error on Sdev, optionally evaluate on Stest.",
+        "image": ""
+      },
+      {
+        "text": "Choose hyperparameters that work best on the test data.",
+        "image": ""
+      },
+      {
+        "text": "Split the data into train and test, and choose hyperparameters on the test data.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "BiasVariance: What does the term \"empirical risk\" refer to in the context of machine learning?",
+    "answers": [
+      {
+        "text": "The risk associated with the variance of a model.",
+        "image": ""
+      },
+      {
+        "text": "The risk associated with the bias of a model.",
+        "image": ""
+      },
+      {
+        "text": "The error of the model on the training data.",
+        "image": ""
+      },
+      {
+        "text": "The generalization performance of the model on new, unseen data.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "BiasVariance: According to the sources, what is the relationship between model complexity and error?",
+    "answers": [
+      {
+        "text": "As model complexity increases, error always decreases.",
+        "image": ""
+      },
+      {
+        "text": "As model complexity decreases, error always decreases.",
+        "image": ""
+      },
+      {
+        "text": "There is an optimal level of model complexity that results in the lowest error, typically, increasing complexity will initially decrease error and then will increase it.",
+        "image": ""
+      },
+      {
+        "text": "Model complexity does not affect error.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "PCA: What is a manifold in the context of PCA?",
+    "answers": [
+      {
+        "text": "A high-dimensional space where data points are randomly scattered.",
+        "image": ""
+      },
+      {
+        "text": "A topological space that locally resembles Euclidean space, where data may reside.",
+        "image": ""
+      },
+      {
+        "text": "A set of basis vectors used for representing data points.",
+        "image": ""
+      },
+      {
+        "text": "A non-linear transformation applied to the data.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "PCA: What is a 'chart' in the context of manifolds?",
+    "answers": [
+      {
+        "text": "A visual representation of data in a scatter plot.",
+        "image": ""
+      },
+      {
+        "text": "A function that provides a one-to-one correspondence between open regions of a surface and subsets of Euclidean space.",
+        "image": ""
+      },
+      {
+        "text": "A method for reducing the dimensionality of the data.",
+        "image": ""
+      },
+      {
+        "text": "A way to visualize the principal components of a dataset.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "PCA: What is the key property of a chart mapping (ϕ)?",
+    "answers": [
+      {
+        "text": "It must be non-invertible.",
+        "image": ""
+      },
+      {
+        "text": "It must be discontinuous.",
+        "image": ""
+      },
+      {
+        "text": "It must be smooth and invertible (a diffeomorphism).",
+        "image": ""
+      },
+      {
+        "text": "It can be any arbitrary mapping.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "PCA: In the context of unsupervised learning, what is the primary goal?",
+    "answers": [
+      {
+        "text": "To predict labels for input data.",
+        "image": ""
+      },
+      {
+        "text": "To uncover meaningful structures or representations within the data.",
+        "image": ""
+      },
+      {
+        "text": "To train a model with labeled outputs.",
+        "image": ""
+      },
+      {
+        "text": "To use a supervised learning approach.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "PCA: What is a basis in the context of vector spaces?",
+    "answers": [
+      {
+        "text": "A set of random vectors used for representing points.",
+        "image": ""
+      },
+      {
+        "text": "A set of linearly independent vectors that can be used to reconstruct any point in the space.",
+        "image": ""
+      },
+      {
+        "text": "A single vector that captures the variance of the data.",
+        "image": ""
+      },
+      {
+        "text": "A set of vectors that overlap and point in similar directions.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "PCA: What is the implication of mean-centering a dataset before applying PCA?",
+    "answers": [
+      {
+        "text": "It increases the variance of the data.",
+        "image": ""
+      },
+      {
+        "text": "It shifts the data away from the origin.",
+        "image": ""
+      },
+      {
+        "text": "It ensures the data is centered at the origin, simplifying calculations.",
+        "image": ""
+      },
+      {
+        "text": "It makes the data more noisy.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "PCA: Why must basis vectors be linearly independent?",
+    "answers": [
+      {
+        "text": "To make computations easier.",
+        "image": ""
+      },
+      {
+        "text": "To ensure the basis vectors point in similar directions.",
+        "image": ""
+      },
+      {
+        "text": "To ensure they span the entire space and can reconstruct any point in the space without overlap or redundancy.",
+        "image": ""
+      },
+      {
+        "text": "Linear dependence is required in PCA.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "PCA: In a standard basis, what is unique about the weights when representing a data point?",
+    "answers": [
+      {
+        "text": "The weights must be numerically solved for.",
+        "image": ""
+      },
+      {
+        "text": "The weights are zero for every dimension.",
+        "image": ""
+      },
+      {
+        "text": "The weights are simply the values of the data point itself.",
+        "image": ""
+      },
+      {
+        "text": "They require complex mathematical functions for computation.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "PCA: What is the significance of an orthonormal basis in the context of PCA?",
+    "answers": [
+      {
+        "text": "It requires more complex calculations to find the weights.",
+        "image": ""
+      },
+      {
+        "text": "It makes the representation of a point more complicated.",
+        "image": ""
+      },
+      {
+        "text": "It simplifies the calculation of the weight vector; it can be expressed directly in terms of the spanning set and the data itself.",
+        "image": ""
+      },
+      {
+        "text": "It provides a non-unique basis for representing the data.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "PCA: What is the role of the operation CC^T (where C is the basis matrix) in the context of an orthonormal basis?",
+    "answers": [
+      {
+        "text": "It is a non-linear transformation.",
+        "image": ""
+      },
+      {
+        "text": "It scales the data.",
+        "image": ""
+      },
+      {
+        "text": "It acts as a projection matrix, ensuring data is represented by the orthonormal basis.",
+        "image": ""
+      },
+      {
+        "text": "It adds noise to the data.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "PCA: What happens when the number of spanning vectors (K) is less than the dimensionality of the data space (D)?",
+    "answers": [
+      {
+        "text": "All points can still be perfectly represented.",
+        "image": ""
+      },
+      {
+        "text": "The data becomes more accurate.",
+        "image": ""
+      },
+      {
+        "text": "Points can only be approximated but not perfectly represented in the subspace.",
+        "image": ""
+      },
+      {
+        "text": "The spanning vectors become linearly dependent.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "PCA: What does the projection of a data point onto a subspace represent?",
+    "answers": [
+      {
+        "text": "A random transformation of the original data point.",
+        "image": ""
+      },
+      {
+        "text": "The 'dropping' of the data point perpendicularly onto the subspace defined by the basis vectors.",
+        "image": ""
+      },
+      {
+        "text": "A transformation that moves the data point away from the subspace.",
+        "image": ""
+      },
+      {
+        "text": "An increase in the dimensionality of the data.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "PCA: What is learned in Principal Component Analysis (PCA) besides weights?",
+    "answers": [
+      {
+        "text": "Only the weights are learned.",
+        "image": ""
+      },
+      {
+        "text": "An appropriate basis (principal components) is also learned alongside the weights.",
+        "image": ""
+      },
+      {
+        "text": "Non-linear transformations of the input data.",
+        "image": ""
+      },
+      {
+        "text": "The eigenvalues of the data matrix are minimized.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "PCA: What is the relationship between the PCA least squares cost function and the autoencoder?",
+    "answers": [
+      {
+        "text": "They are unrelated mathematical concepts.",
+        "image": ""
+      },
+      {
+        "text": "The simplified PCA least squares cost function under orthogonality constraint is known as the autoencoder.",
+        "image": ""
+      },
+      {
+        "text": "The autoencoder is only used for supervised learning.",
+        "image": ""
+      },
+      {
+        "text": "The cost function is always minimized by using non-orthogonal matrices.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "PCA: What are principal components?",
+    "answers": [
+      {
+        "text": "Randomly chosen vectors that span the data space.",
+        "image": ""
+      },
+      {
+        "text": "The elements of the orthonormal basis that point in the directions of the greatest variance in the dataset.",
+        "image": ""
+      },
+      {
+        "text": "The weight vectors used to represent each point.",
+        "image": ""
+      },
+      {
+        "text": "The eigenvalues of the data covariance matrix.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "PCA: How are principal components computed?",
+    "answers": [
+      {
+        "text": "By random selection from the dataset.",
+        "image": ""
+      },
+      {
+        "text": "As the eigenvectors of the data's correlation matrix (or covariance matrix).",
+        "image": ""
+      },
+      {
+        "text": "By a complex non-linear optimization process.",
+        "image": ""
+      },
+      {
+        "text": "Using only the standard basis.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "Clustering: In the context of decision trees, what is the primary purpose of a 'splitting variable'?",
+    "answers": [
+      {
+        "text": "To randomly select a subset of the data to be classified.",
+        "image": ""
+      },
+      {
+        "text": "To reduce the number of neighbours in the k-NN algorithm",
+        "image": ""
+      },
+      {
+        "text": "To divide the feature space into mutually exclusive regions",
+        "image": ""
+      },
+      {
+        "text": "To assign weights to all the samples according to their importance.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Clustering: What is the role of the 'misclassification rate' in the context of building a decision tree?",
+    "answers": [
+      {
+        "text": "It helps to reduce the data dimensionality.",
+        "image": ""
+      },
+      {
+        "text": "It determines the appropriate number of nearest neighbors in a k-NN.",
+        "image": ""
+      },
+      {
+        "text": "It quantifies the performance of a given split",
+        "image": ""
+      },
+      {
+        "text": "It evaluates the overall performance of the trained model",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Clustering: What is the primary distinction between 'Tree-based methods' and 'Linear regression models' as described in the text?",
+    "answers": [
+      {
+        "text": "Tree-based methods use Euclidean distance, while linear regression does not.",
+        "image": ""
+      },
+      {
+        "text": "Linear regression models are more robust to outliers.",
+        "image": ""
+      },
+      {
+        "text": "Tree-based methods partition the input space into rectangles whilst linear regression creates a single partitioning",
+        "image": ""
+      },
+      {
+        "text": "Linear regression models are more computationally efficient.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Clustering: In the context of bagging, what is 'bootstrapping'?",
+    "answers": [
+      {
+        "text": "The random division of the data into training and test sets.",
+        "image": ""
+      },
+      {
+        "text": "The technique used to visualize the decision boundaries in tree-based methods",
+        "image": ""
+      },
+      {
+        "text": "A method of randomly sampling with replacement from the original dataset",
+        "image": ""
+      },
+      {
+        "text": "A process of feature selection that reduces the complexity of the model.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Clustering: How does 'random forest' method build individual trees that are less correlated?",
+    "answers": [
+      {
+        "text": "By using only a subset of the input samples for the training.",
+        "image": ""
+      },
+      {
+        "text": "By pruning the trees to reduce their complexity.",
+        "image": ""
+      },
+      {
+        "text": "By randomly choosing a subset of the features at each split",
+        "image": ""
+      },
+      {
+        "text": "By weighting the importance of the features.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Clustering: What is the fundamental idea behind 'Boosting' as described in the text?",
+    "answers": [
+      {
+        "text": "To average the predictions of multiple decision trees.",
+        "image": ""
+      },
+      {
+        "text": "To make every tree independent from other trees.",
+        "image": ""
+      },
+      {
+        "text": "To build an ensemble of models, where each model corrects the errors of its predecessor",
+        "image": ""
+      },
+      {
+        "text": "To select the best performing features among all available ones.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Clustering: In the context of boosting, how are the weights of the training samples adjusted after each boosting step?",
+    "answers": [
+      {
+        "text": "Weights are assigned to the samples based on their Euclidean distance from the decision boundary.",
+        "image": ""
+      },
+      {
+        "text": "The weights are randomly re-distributed to ensure variety in the training data.",
+        "image": ""
+      },
+      {
+        "text": "Weights are adjusted to increase the importance of misclassified samples",
+        "image": ""
+      },
+      {
+        "text": "The weights of the training samples remain unchanged throughout the boosting process.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Clustering: What is the objective function that is being optimized when fitting a single tree in a boosting model?",
+    "answers": [
+      {
+        "text": "The misclassification rate.",
+        "image": ""
+      },
+      {
+        "text": "A sum of weights of the misclassified examples",
+        "image": ""
+      },
+      {
+        "text": "The entropy",
+        "image": ""
+      },
+      {
+        "text": "The variance of the labels.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "Clustering: In the context of gradient boosting, what does the ‘gradient’ refer to?",
+    "answers": [
+      {
+        "text": "The direction of maximum increase of the loss function",
+        "image": ""
+      },
+      {
+        "text": "The set of all training samples.",
+        "image": ""
+      },
+      {
+        "text": "The change in the feature space",
+        "image": ""
+      },
+      {
+        "text": "The number of nodes in the decision tree.",
+        "image": ""
+      }
+    ],
+    "correct": 0,
+    "image": ""
+  },
+  {
+    "quest": "Clustering: Which of the following best describes the core idea behind the k-Nearest Neighbour (k-NN) algorithm as presented in the source?",
+    "answers": [
+      {
+        "text": "It partitions the feature space into rectangles.",
+        "image": ""
+      },
+      {
+        "text": "It determines class membership by identifying the k-nearest data points to a given instance.",
+        "image": ""
+      },
+      {
+        "text": "It applies boosting techniques to improve accuracy.",
+        "image": ""
+      },
+      {
+        "text": "It uses a linear combination of basis functions.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "Clustering: In the context of tree-based methods, which of the following is NOT a typical criterion for splitting nodes?",
+    "answers": [
+      {
+        "text": "Maximizing information gain.",
+        "image": ""
+      },
+      {
+        "text": "Minimising impurity.",
+        "image": ""
+      },
+      {
+        "text": "Maximising the number of features.",
+        "image": ""
+      },
+      {
+        "text": "Minimizing the weighted average impurity of child nodes",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Clustering: What is the purpose of the 'margin' in the context of support vector machines as described in the source?",
+    "answers": [
+      {
+        "text": "To ensure each data point is correctly classified.",
+        "image": ""
+      },
+      {
+        "text": "To find a decision boundary that minimizes the number of misclassifications.",
+        "image": ""
+      },
+      {
+        "text": "To minimize the computational complexity.",
+        "image": ""
+      },
+      {
+        "text": "To maximize the separation between classes.",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "Clustering: The source mentions that the decision rule for k-NN is updated based on which aspect of the k neighbours?",
+    "answers": [
+      {
+        "text": "Their distances to the decision boundary.",
+        "image": ""
+      },
+      {
+        "text": "Their feature values.",
+        "image": ""
+      },
+      {
+        "text": "Their class labels.",
+        "image": ""
+      },
+      {
+        "text": "Their position in feature space.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Clustering: What is the primary focus of 'Boosting' algorithms, according to the source?",
+    "answers": [
+      {
+        "text": "To independently fit many decision trees.",
+        "image": ""
+      },
+      {
+        "text": "To iteratively fit weak learners while focusing on misclassified instances from previous iterations.",
+        "image": ""
+      },
+      {
+        "text": "To linearly separate data into different classes.",
+        "image": ""
+      },
+      {
+        "text": "To find the optimal decision boundary using a kernel trick.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "Clustering: In the context of boosting, what is meant by “reweighting” training data?",
+    "answers": [
+      {
+        "text": "It’s where the values of features are adjusted.",
+        "image": ""
+      },
+      {
+        "text": "It is the process of re-assigning training samples to different classes.",
+        "image": ""
+      },
+      {
+        "text": "It’s adjusting the weights of the linear function.",
+        "image": ""
+      },
+      {
+        "text": "It means increasing the weight of instances that are harder to classify.",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "Clustering: According to the source, what is a 'weak learner' in the context of boosting algorithms?",
+    "answers": [
+      {
+        "text": "A model that achieves very low training error.",
+        "image": ""
+      },
+      {
+        "text": "A model that performs slightly better than random guessing.",
+        "image": ""
+      },
+      {
+        "text": "A complex model with high capacity.",
+        "image": ""
+      },
+      {
+        "text": "A model that is prone to overfitting.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "Clustering: What is the main objective of the 'objective function' mentioned in the section about boosting?",
+    "answers": [
+      {
+        "text": "To minimise the number of training samples.",
+        "image": ""
+      },
+      {
+        "text": "To maximize the margin between classes.",
+        "image": ""
+      },
+      {
+        "text": "To minimise the empirical risk (loss) based on the training data.",
+        "image": ""
+      },
+      {
+        "text": "To maximize the number of iterations.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Clustering: According to the source, what is the rationale behind 'regularization' when building the objective function in boosting?",
+    "answers": [
+      {
+        "text": "To speed up training time.",
+        "image": ""
+      },
+      {
+        "text": "To simplify the data.",
+        "image": ""
+      },
+      {
+        "text": "To avoid overfitting.",
+        "image": ""
+      },
+      {
+        "text": "To convert linear to non-linear problems.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "An image is defined as:",
+    "answers": [
+      {
+        "text": "A collection of coloured dots.",
+        "image": ""
+      },
+      {
+        "text": "A two-dimensional array of numerical values.",
+        "image": ""
+      },
+      {
+        "text": "A function that maps locations to pixels.",
+        "image": ""
+      },
+      {
+        "text": "A visual representation of objects.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "How many colour combinations are possible for a single pixel in an RGB image, where each colour channel (Red, Green, Blue) has values ranging from 0 to 255?",
+    "answers": [
+      {
+        "text": "256",
+        "image": ""
+      },
+      {
+        "text": "65,536",
+        "image": ""
+      },
+      {
+        "text": "16,777,216",
+        "image": ""
+      },
+      {
+        "text": "1,048,576",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "What is the primary function of image filtering?",
+    "answers": [
+      {
+        "text": "To change the colour palette of an image.",
+        "image": ""
+      },
+      {
+        "text": "To alter the pixel locations within an image.",
+        "image": ""
+      },
+      {
+        "text": "To apply a function to the pixels of an image, without changing their positions.",
+        "image": ""
+      },
+      {
+        "text": "To compress the size of an image.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Which of the following is NOT a typical application of image filtering?",
+    "answers": [
+      {
+        "text": "Image deblurring.",
+        "image": ""
+      },
+      {
+        "text": "Improving contrast.",
+        "image": ""
+      },
+      {
+        "text": "Noise reduction.",
+        "image": ""
+      },
+      {
+        "text": "Increasing image resolution.",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "What is the mathematical operation at the core of 2D convolutions?",
+    "answers": [
+      {
+        "text": "Subtraction and division.",
+        "image": ""
+      },
+      {
+        "text": "Element-wise matrix multiplication and summation.",
+        "image": ""
+      },
+      {
+        "text": "Vector dot product.",
+        "image": ""
+      },
+      {
+        "text": "Matrix inversion.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "If the kernel was not flipped in a 2D convolution, the operation would be a:",
+    "answers": [
+      {
+        "text": "Matrix transpose.",
+        "image": ""
+      },
+      {
+        "text": "Cross-correlation.",
+        "image": ""
+      },
+      {
+        "text": "Dot product.",
+        "image": ""
+      },
+      {
+        "text": "Linear transformation.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "What is the purpose of padding in 2D convolutions?",
+    "answers": [
+      {
+        "text": "To reduce the computational cost of the convolution.",
+        "image": ""
+      },
+      {
+        "text": "To maintain the same output dimensions as the input.",
+        "image": ""
+      },
+      {
+        "text": "To sharpen the image.",
+        "image": ""
+      },
+      {
+        "text": "To blur the image.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "An identity kernel in image filtering will:",
+    "answers": [
+      {
+        "text": "Sharpen the edges of an image.",
+        "image": ""
+      },
+      {
+        "text": "Leave the image unchanged.",
+        "image": ""
+      },
+      {
+        "text": "Blur the image significantly.",
+        "image": ""
+      },
+      {
+        "text": "Invert the colours of the image.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "A mean blur kernel works by:",
+    "answers": [
+      {
+        "text": "Multiplying the pixel value by a constant.",
+        "image": ""
+      },
+      {
+        "text": "Amplifying the difference between a pixel and its neighbors.",
+        "image": ""
+      },
+      {
+        "text": "Averaging a pixel with its surrounding neighbours.",
+        "image": ""
+      },
+      {
+        "text": "Giving greater weight to the centre pixel.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "Which filter is known for weighting nearby pixels more heavily than distant ones, leading to a more natural-looking blur?",
+    "answers": [
+      {
+        "text": "Mean blur filter.",
+        "image": ""
+      },
+      {
+        "text": "Sharpening kernel.",
+        "image": ""
+      },
+      {
+        "text": "Identity kernel.",
+        "image": ""
+      },
+      {
+        "text": "Gaussian blur filter.",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "Which of the following is a property of a Gaussian filter?",
+    "answers": [
+      {
+        "text": "Non-rotational symmetry.",
+        "image": ""
+      },
+      {
+        "text": "It weights distant pixels more than nearby ones.",
+        "image": ""
+      },
+      {
+        "text": "Rotational symmetry.",
+        "image": ""
+      },
+      {
+        "text": "It enhances the noise in an image.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "What is the separable property of a filter?",
+    "answers": [
+      {
+        "text": "Applying the filter only to certain parts of the image.",
+        "image": ""
+      },
+      {
+        "text": "First convolving rows with a 1D filter, then columns with a 1D filter.",
+        "image": ""
+      },
+      {
+        "text": "Applying different filters to different image channels.",
+        "image": ""
+      },
+      {
+        "text": "Convolution with multiple kernels",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "In the context of a Gaussian pyramid, what is the first step after starting with the original image?",
+    "answers": [
+      {
+        "text": "Downsampling the image.",
+        "image": ""
+      },
+      {
+        "text": "Applying a Gaussian blur.",
+        "image": ""
+      },
+      {
+        "text": "Upsampling the image.",
+        "image": ""
+      },
+      {
+        "text": "Applying a sharpening filter.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "How does downsampling change the image size in each level of a Gaussian pyramid?",
+    "answers": [
+      {
+        "text": "Reduces the size by a factor of 2.",
+        "image": ""
+      },
+      {
+        "text": "Reduces the size by a factor of 3.",
+        "image": ""
+      },
+      {
+        "text": "Reduces the size by a factor of 4.",
+        "image": ""
+      },
+      {
+        "text": "Reduces the size by a factor of 8.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "What is the primary reason for applying a Gaussian blur before downsampling in a Gaussian pyramid?",
+    "answers": [
+      {
+        "text": "To increase the resolution of the image.",
+        "image": ""
+      },
+      {
+        "text": "To sharpen the image before resizing.",
+        "image": ""
+      },
+      {
+        "text": "To act as a low-pass filter and prevent aliasing.",
+        "image": ""
+      },
+      {
+        "text": "To make the image more colourful.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "What is aliasing, as discussed in the context of image downsampling?",
+    "answers": [
+      {
+        "text": "The effect of making the image sharper.",
+        "image": ""
+      },
+      {
+        "text": "Distortions in the downsampled image caused by undersampling high-frequency components.",
+        "image": ""
+      },
+      {
+        "text": "The effect of applying a blur.",
+        "image": ""
+      },
+      {
+        "text": "A form of image compression.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: Which of the following best describes the fundamental concept of computer vision, as presented in the material?",
+    "answers": [
+      {
+        "text": "Generating arrays of numbers that resemble real-world objects, like fruits.",
+        "image": ""
+      },
+      {
+        "text": "Solving the 'inverse graphics' problem by inferring the structure of the world from visual cues.",
+        "image": ""
+      },
+      {
+        "text": "Creating digital images using a pinhole camera model and digitizers.",
+        "image": ""
+      },
+      {
+        "text": "Recognising objects by matching 2D image fragments and their configurations.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: The 'trompe l’oeil' examples in the text primarily illustrate which aspect of computer vision?",
+    "answers": [
+      {
+        "text": "The challenges of object recognition in cluttered scenes.",
+        "image": ""
+      },
+      {
+        "text": "The use of color and shading to create realistic images.",
+        "image": ""
+      },
+      {
+        "text": "The exploitation of depth-perception cues and their mathematical modeling.",
+        "image": ""
+      },
+      {
+        "text": "The importance of prior expectations in image interpretation.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: According to the provided text, what is a key characteristic of 'basic level categories' in object recognition?",
+    "answers": [
+      {
+        "text": "They represent the most detailed classification of objects.",
+        "image": ""
+      },
+      {
+        "text": "They are the categories that are most difficult for humans to identify quickly.",
+        "image": ""
+      },
+      {
+        "text": "They are culturally dependent without any consistency.",
+        "image": ""
+      },
+      {
+        "text": "They represent the highest level at which category members have similar perceived shapes and are easily recognized by humans.",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: What is the primary purpose of image filtering, as described in the material?",
+    "answers": [
+      {
+        "text": "To increase the amount of noise in an image to make edges more apparent.",
+        "image": ""
+      },
+      {
+        "text": "To create 3D models of objects from 2D images.",
+        "image": ""
+      },
+      {
+        "text": "To enhance image quality, extract features, and reduce noise.",
+        "image": ""
+      },
+      {
+        "text": "To generate new images using the principles of graphics.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: In the context of 2D convolution, which of the following steps is essential?",
+    "answers": [
+      {
+        "text": "Rotating the filter kernel by 90 degrees.",
+        "image": ""
+      },
+      {
+        "text": "Mirroring the filter kernel before applying it to the image.",
+        "image": ""
+      },
+      {
+        "text": "Applying a non-linear function to the local image patch.",
+        "image": ""
+      },
+      {
+        "text": "Only summing the values without multiplication.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: What does the text say about linear systems in the context of image processing?",
+    "answers": [
+      {
+        "text": "They are used only for non-linear filtering.",
+        "image": ""
+      },
+      {
+        "text": "They are characterized by a lack of superposition.",
+        "image": ""
+      },
+      {
+        "text": "They exhibit properties such as homogeneity, additivity, and superposition.",
+        "image": ""
+      },
+      {
+        "text": "They cannot be represented by matrix operations.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: According to the source material, why is Gaussian averaging preferred over a simple box filter for smoothing?",
+    "answers": [
+      {
+        "text": "Because box filters are computationally more expensive.",
+        "image": ""
+      },
+      {
+        "text": "Because box filters do not reduce noise effectively.",
+        "image": ""
+      },
+      {
+        "text": "Because Gaussian averaging gives more weight to nearby pixels, modelling probabilistic inference.",
+        "image": ""
+      },
+      {
+        "text": "Because box filters are not separable and therefore cannot be implemented efficiently.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: What is the main problem caused by subsampling without average filtering, according to the text?",
+    "answers": [
+      {
+        "text": "It makes the image smoother.",
+        "image": ""
+      },
+      {
+        "text": "It increases the resolution of the image.",
+        "image": ""
+      },
+      {
+        "text": "It leads to aliasing, introducing artifacts in the image.",
+        "image": ""
+      },
+      {
+        "text": "It preserves high-frequency information more accurately.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: What is the significance of the 'derivative of Gaussian' in edge detection, according to the source?",
+    "answers": [
+      {
+        "text": "It is used to enhance noise and amplify variations in the image.",
+        "image": ""
+      },
+      {
+        "text": "It directly extracts lines and edges without the need for smoothing.",
+        "image": ""
+      },
+      {
+        "text": "It is an approximation of the optimal edge detector under certain assumptions (linear filtering and additive Gaussian noise).",
+        "image": ""
+      },
+      {
+        "text": "It is a simplified method used to avoid complex calculations in edge detection.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: In the context of edge detection, what is the role of \"non-maximum suppression\"?",
+    "answers": [
+      {
+        "text": "To amplify the noise near edges.",
+        "image": ""
+      },
+      {
+        "text": "To smooth out the detected edges.",
+        "image": ""
+      },
+      {
+        "text": "To thin edges by choosing the largest gradient magnitude along the gradient direction.",
+        "image": ""
+      },
+      {
+        "text": "To detect edges at different scales and combine them into a single map.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: What is the Laplacian operator, as presented in the text, and what is it used for?",
+    "answers": [
+      {
+        "text": "It is a filter that calculates the gradient magnitude of an image.",
+        "image": ""
+      },
+      {
+        "text": "It is a smoothing filter that reduces high-frequency information.",
+        "image": ""
+      },
+      {
+        "text": "It is a linear filter used to detect edges by identifying zero-crossings of the second derivative.",
+        "image": ""
+      },
+      {
+        "text": "It is a filter that is used for color histogram generation.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: What is a primary motivation for using color histograms for object recognition?",
+    "answers": [
+      {
+        "text": "They are sensitive to geometric transformations.",
+        "image": ""
+      },
+      {
+        "text": "They require perfect segmentation of objects.",
+        "image": ""
+      },
+      {
+        "text": "They are computationally expensive, however, this is offset by the quality of recognition they provide.",
+        "image": ""
+      },
+      {
+        "text": "They are relatively invariant to object translations, image rotations, and partial occlusions.",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: According to the source, what is a limitation of using color histograms for object recognition?",
+    "answers": [
+      {
+        "text": "They cannot be used for deformable objects such as pullovers.",
+        "image": ""
+      },
+      {
+        "text": "They require a large number of training views per object.",
+        "image": ""
+      },
+      {
+        "text": "They can be sensitive to changes in illumination conditions.",
+        "image": ""
+      },
+      {
+        "text": "They perform poorly when objects are partially occluded.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: Which of the following statements accurately describes the 'Intersection' method for comparing histograms?",
+    "answers": [
+      {
+        "text": "It calculates the differences between corresponding histogram cells.",
+        "image": ""
+      },
+      {
+        "text": "It gives a higher score when there is minimal overlap between histograms.",
+        "image": ""
+      },
+      {
+        "text": "It measures the common part of both histograms, with a range between 0 and 1.",
+        "image": ""
+      },
+      {
+        "text": "It weights all histogram cells equally regardless of their significance.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: In the context of performance evaluation, what does a confusion matrix help to determine?",
+    "answers": [
+      {
+        "text": "The optimal parameters for an image filtering algorithm.",
+        "image": ""
+      },
+      {
+        "text": "The best method for comparing color histograms.",
+        "image": ""
+      },
+      {
+        "text": "The number of true positives, true negatives, false positives, and false negatives for a given classifier and threshold.",
+        "image": ""
+      },
+      {
+        "text": "The area under the ROC curve for a specific model.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: What does the term \"recall\" measure in the context of performance evaluation?",
+    "answers": [
+      {
+        "text": "The proportion of correctly identified negative cases.",
+        "image": ""
+      },
+      {
+        "text": "The proportion of actual positives that are correctly identified.",
+        "image": ""
+      },
+      {
+        "text": "The overall accuracy of the classification model.",
+        "image": ""
+      },
+      {
+        "text": "The proportion of false alarms in the classification process.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: In the context of ROC curves, what does the True Positive Rate (TPR) represent?",
+    "answers": [
+      {
+        "text": "The rate of false alarms for a given threshold.",
+        "image": ""
+      },
+      {
+        "text": "The proportion of correctly identified negative cases.",
+        "image": ""
+      },
+      {
+        "text": "The proportion of actual positives that are correctly identified.",
+        "image": ""
+      },
+      {
+        "text": "The overall accuracy of the classification model.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: What is the significance of the Area Under the ROC Curve (AUROC)?",
+    "answers": [
+      {
+        "text": "It represents the trade-off between precision and recall for a classification model.",
+        "image": ""
+      },
+      {
+        "text": "It indicates how well a classifier distinguishes between two classes, with a higher AUROC suggesting better performance.",
+        "image": ""
+      },
+      {
+        "text": "It helps choose the best comparison method for color histograms.",
+        "image": ""
+      },
+      {
+        "text": "It is used to determine the optimal threshold for object detection algorithms.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: According to the material, why is the precision-recall curve preferred for detection tasks?",
+    "answers": [
+      {
+        "text": "Because it does not require any threshold.",
+        "image": ""
+      },
+      {
+        "text": "Because it is less sensitive to noise than other performance metrics.",
+        "image": ""
+      },
+      {
+        "text": "Because it is better suited when the number of true negatives is not well-defined, such as in detection tasks.",
+        "image": ""
+      },
+      {
+        "text": "Because it gives more importance to the true negative rate.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: Leonardo da Vinci's observations about the camera obscura, as described in the text, highlight which fundamental principle of image formation?",
+    "answers": [
+      {
+        "text": "The principle of digital image processing.",
+        "image": ""
+      },
+      {
+        "text": "The formation of a reversed and reduced image through a small aperture.",
+        "image": ""
+      },
+      {
+        "text": "The concept of linear filtering in image enhancement.",
+        "image": ""
+      },
+      {
+        "text": "The use of color histograms for object recognition.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: The text refers to computer vision as the problem of 'inverse graphics'. What does this imply about the goals of computer vision?",
+    "answers": [
+      {
+        "text": "Computer vision aims to generate images that are indistinguishable from real-world scenes.",
+        "image": ""
+      },
+      {
+        "text": "Computer vision seeks to create digital images by using the pinhole camera model.",
+        "image": ""
+      },
+      {
+        "text": "Computer vision tries to infer the properties of the world from images, reversing the process of graphics which creates images from the world.",
+        "image": ""
+      },
+      {
+        "text": "Computer vision focuses on the analysis of color histograms for object identification.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: According to the text, what is the significance of the 'pictorial structure' model in object recognition?",
+    "answers": [
+      {
+        "text": "It only uses 3D models for object recognition.",
+        "image": ""
+      },
+      {
+        "text": "It relies on color histograms to identify objects.",
+        "image": ""
+      },
+      {
+        "text": "It represents objects as combinations of 2D image fragments and their configurations.",
+        "image": ""
+      },
+      {
+        "text": "It is a simple method that can overcome all complexities of object recognition.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: What does the material say about the challenges of visual categorization?",
+    "answers": [
+      {
+        "text": "They are limited to problems with occlusions.",
+        "image": ""
+      },
+      {
+        "text": "They are not affected by multi-scale, multi-view variations.",
+        "image": ""
+      },
+      {
+        "text": "They include issues such as multi-scale, multi-view, multi-class, varying illumination, occlusion, cluttered backgrounds, articulation, and high intraclass variance/low interclass variance.",
+        "image": ""
+      },
+      {
+        "text": "They are easily solved by basic linear filtering techniques.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: What is the role of a 'filter kernel' in the context of image filtering?",
+    "answers": [
+      {
+        "text": "It is used to digitize analog images.",
+        "image": ""
+      },
+      {
+        "text": "It is only useful for non-linear operations.",
+        "image": ""
+      },
+      {
+        "text": "It is a small matrix that is used to apply some function to a local image patch during convolution.",
+        "image": ""
+      },
+      {
+        "text": "It represents the output image, after applying the convolution.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: According to the source material, what is the primary goal of using linear filtering for smoothing an image?",
+    "answers": [
+      {
+        "text": "To enhance the edges and details in an image.",
+        "image": ""
+      },
+      {
+        "text": "To create a sharper version of the image.",
+        "image": ""
+      },
+      {
+        "text": "To reduce noise and fill in missing information.",
+        "image": ""
+      },
+      {
+        "text": "To perform non-linear operations on an image.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: Why is the concept of 'separability' important in the context of Gaussian filtering?",
+    "answers": [
+      {
+        "text": "Because it makes the filter non-linear.",
+        "image": ""
+      },
+      {
+        "text": "Because it allows for efficient implementation of the filtering operation by applying 1D filters sequentially.",
+        "image": ""
+      },
+      {
+        "text": "Because it increases the smoothing effect on an image.",
+        "image": ""
+      },
+      {
+        "text": "Because it reduces the computational cost of applying a box filter.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: What is the main idea behind using a Gaussian pyramid for multi-scale image representation?",
+    "answers": [
+      {
+        "text": "To reduce the resolution of images for easier processing.",
+        "image": ""
+      },
+      {
+        "text": "To apply linear filtering in a single scale.",
+        "image": ""
+      },
+      {
+        "text": "To represent an image at different scales by repeated smoothing and subsampling.",
+        "image": ""
+      },
+      {
+        "text": "To compute the 2nd derivative of an image.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: In the context of edge detection, why is smoothing an image prior to computing derivatives beneficial?",
+    "answers": [
+      {
+        "text": "It enhances the noise, to see edges more clearly.",
+        "image": ""
+      },
+      {
+        "text": "It ensures edges are not affected by lighting changes.",
+        "image": ""
+      },
+      {
+        "text": "It reduces the impact of noise and small variations, which can interfere with detecting true edges.",
+        "image": ""
+      },
+      {
+        "text": "It makes the edges thicker and more visible.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: According to the text, what does the magnitude of the gradient measure in edge detection?",
+    "answers": [
+      {
+        "text": "The direction of the edge.",
+        "image": ""
+      },
+      {
+        "text": "The noise level around an edge.",
+        "image": ""
+      },
+      {
+        "text": "The strength of an edge.",
+        "image": ""
+      },
+      {
+        "text": "The scale of the image where edges are more evident.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: What is the main advantage of using the Canny edge detector over other edge detection methods, according to the text?",
+    "answers": [
+      {
+        "text": "It is faster and less computationally intensive than other methods.",
+        "image": ""
+      },
+      {
+        "text": "It is an approximation of the optimal edge detector under the assumptions of linear filtering and additive Gaussian noise, offering a good trade-off between detection and localization.",
+        "image": ""
+      },
+      {
+        "text": "It does not require any parameter tuning for different images.",
+        "image": ""
+      },
+      {
+        "text": "It is simpler to implement and more robust in noisy conditions.",
+        "image": ""
+      }
+    ],
+    "correct": 1,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: In the context of edge detection using the Laplacian, what are 'zero-crossings' and what do they indicate?",
+    "answers": [
+      {
+        "text": "They indicate the location of the maximum gradient value.",
+        "image": ""
+      },
+      {
+        "text": "They indicate the strength of an edge in an image.",
+        "image": ""
+      },
+      {
+        "text": "They are used to calculate color histograms in an image.",
+        "image": ""
+      },
+      {
+        "text": "They represent points where the second derivative changes sign, which indicates the location of edges.",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: According to the text, what is a key characteristic of appearance-based object identification/recognition?",
+    "answers": [
+      {
+        "text": "It relies on explicit 3D models of objects.",
+        "image": ""
+      },
+      {
+        "text": "It requires perfect segmentation of the object in the image.",
+        "image": ""
+      },
+      {
+        "text": "It represents objects by a collection of 2D images without the need for a 3D model, and it is sufficient to compare the 2D appearances.",
+        "image": ""
+      },
+      {
+        "text": "It is invariant to changes in the viewing angle.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: What does the material say about the use of color in object recognition?",
+    "answers": [
+      {
+        "text": "Color changes under geometric transformations and therefore is not a reliable feature.",
+        "image": ""
+      },
+      {
+        "text": "Color is a global feature that is robust to occlusions.",
+        "image": ""
+      },
+      {
+        "text": "Color is a local feature that remains relatively constant under geometric transformations and is robust to partial occlusions.",
+        "image": ""
+      },
+      {
+        "text": "Color cannot be used for recognition because it is very sensitive to light variations.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: What does a 3D (joint) color histogram represent?",
+    "answers": [
+      {
+        "text": "The 1D count of pixels of individual R, G, B colors, and luminance.",
+        "image": ""
+      },
+      {
+        "text": "The color normalized by intensity.",
+        "image": ""
+      },
+      {
+        "text": "The count of pixels for each combination of RGB values.",
+        "image": ""
+      },
+      {
+        "text": "A 2D representation of color, for example, using two parameters, r and g.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: According to the text, what is the significance of using a 'chromatic representation' of color?",
+    "answers": [
+      {
+        "text": "It ensures that the color histograms do not change under rotation.",
+        "image": ""
+      },
+      {
+        "text": "It guarantees that the color histogram is robust to occlusion.",
+        "image": ""
+      },
+      {
+        "text": "It normalizes colors by intensity, focusing on the color itself rather than its brightness.",
+        "image": ""
+      },
+      {
+        "text": "It generates an intensity image that is later used to extract color information.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: What does the Euclidean distance measure in the context of histogram comparison?",
+    "answers": [
+      {
+        "text": "It measures the differences between the histograms, weighting each cell equally.",
+        "image": ""
+      },
+      {
+        "text": "It measures the common part of both histograms.",
+        "image": ""
+      },
+      {
+        "text": "It measures if two distributions are statistically different, with a focus on outliers.",
+        "image": ""
+      },
+      {
+        "text": "It only measures the distance between the central cells of two histograms.",
+        "image": ""
+      }
+    ],
+    "correct": 0,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: In the context of histogram comparison, what does the Chi-square measure primarily aim to test?",
+    "answers": [
+      {
+        "text": "The overlap between the histograms of known objects and a test image.",
+        "image": ""
+      },
+      {
+        "text": "The distances between the centers of two histograms.",
+        "image": ""
+      },
+      {
+        "text": "Whether two distributions are statistically different.",
+        "image": ""
+      },
+      {
+        "text": "If the two images can be considered the same object.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: What is the 'nearest-neighbor' strategy for object recognition using histograms, as described in the text?",
+    "answers": [
+      {
+        "text": "It measures the distance between objects using the Euclidean distance.",
+        "image": ""
+      },
+      {
+        "text": "It focuses on the differences between histograms using a Chi-squared measure.",
+        "image": ""
+      },
+      {
+        "text": "It looks for the perfect overlap of two histograms.",
+        "image": ""
+      },
+      {
+        "text": "It compares a test histogram to a set of known object histograms and selects the one with the best matching score.",
+        "image": ""
+      }
+    ],
+    "correct": 3,
+    "image": ""
+  },
+  {
+    "quest": "CV Basics: According to the material, what is the 'color constancy problem' that affects color histograms?",
+    "answers": [
+      {
+        "text": "It refers to the fact that colors cannot be used in image recognition.",
+        "image": ""
+      },
+      {
+        "text": "It describes a scenario where objects have the same color distribution.",
+        "image": ""
+      },
+      {
+        "text": "It is the problem of pixel colors changing due to the illumination conditions.",
+        "image": ""
+      },
+      {
+        "text": "It is a problem of color histograms that arises because not all objects can be identified by their color distribution.",
+        "image": ""
+      }
+    ],
+    "correct": 2,
+    "image": ""
+  }
+]
\ No newline at end of file