From e09c0022a102ea2f61acebad9a456ca7290f73c5 Mon Sep 17 00:00:00 2001 From: Marco Realacci Date: Wed, 15 Jan 2025 03:35:13 +0100 Subject: [PATCH] add fds AI generated questions --- data/questions/fds_llm.json | 6749 +++++++++++++++++++++++++++++++++++ 1 file changed, 6749 insertions(+) create mode 100644 data/questions/fds_llm.json diff --git a/data/questions/fds_llm.json b/data/questions/fds_llm.json new file mode 100644 index 0000000..c5f23c0 --- /dev/null +++ b/data/questions/fds_llm.json @@ -0,0 +1,6749 @@ +[ + { + "quest": "Autoencoders: What is the purpose of the hidden layer in an autoencoder, which typically has a smaller dimensionality than the input layer?", + "answers": [ + { + "text": "To expand the input data to a higher-dimensional space.", + "image": "" + }, + { + "text": "To learn a compressed and efficient representation of the input data.", + "image": "" + }, + { + "text": "To apply a non-linear transformation to the input.", + "image": "" + }, + { + "text": "To add noise to the input data for robustness.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "Autoencoders: According to the source, why is the constraint of a smaller hidden layer important in an autoencoder?", + "answers": [ + { + "text": "It allows the network to memorize the input.", + "image": "" + }, + { + "text": "It forces the network to learn meaningful patterns and structures.", + "image": "" + }, + { + "text": "It speeds up the training process.", + "image": "" + }, + { + "text": "It prevents overfitting.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "Autoencoders: Which of the following is NOT a typical use case for autoencoders mentioned in the source?", + "answers": [ + { + "text": "Dimensionality reduction for visualization.", + "image": "" + }, + { + "text": "Data compression.", + "image": "" + }, + { + "text": "Feature learning for downstream tasks.", + "image": "" + }, + { + "text": "Supervised classification with labeled data.", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "Autoencoders: What does the source say about using autoencoders with unlabeled data?", + "answers": [ + { + "text": "They are not suitable for unlabeled data.", + "image": "" + }, + { + "text": "They require labeled data to extract useful patterns.", + "image": "" + }, + { + "text": "They can leverage large amounts of unlabeled data to extract useful patterns.", + "image": "" + }, + { + "text": "They are only useful for supervised learning tasks", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Autoencoders: In a linear autoencoder, what is the relationship between the input data (x) and the reconstructed output (x̂), according to the source?", + "answers": [ + { + "text": "x̂ = VxU", + "image": "" + }, + { + "text": "x̂ = UVx", + "image": "" + }, + { + "text": "x̂ = x²", + "image": "" + }, + { + "text": "x̂ = x + noise", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "Autoencoders: What is the encoded representation of x in a linear autoencoder?", + "answers": [ + { + "text": "UVx", + "image": "" + }, + { + "text": "x̂", + "image": "" + }, + { + "text": "Vx", + "image": "" + }, + { + "text": "CCTx", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Autoencoders: According to the source, what is the main drawback of a linear autoencoder?", + "answers": [ + { + "text": "It cannot be used for dimensionality reduction.", + "image": "" + }, + { + "text": "It performs poorly with non-linear data.", + "image": "" + }, + { + "text": "It requires extensive computational resources.", + "image": "" + }, + { + "text": "The mapping is linear, limiting its ability to capture complex non-linear data.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "Autoencoders: In the context of the source, what is Principal Component Analysis (PCA) equivalent to?", + "answers": [ + { + "text": "A deep nonlinear autoencoder.", + "image": "" + }, + { + "text": "A sparse autoencoder.", + "image": "" + }, + { + "text": "A linear autoencoder with a single hidden layer and linear activation functions.", + "image": "" + }, + { + "text": "A denoising autoencoder.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Autoencoders: What are Eigenfaces, as described in the source?", + "answers": [ + { + "text": "A specific type of autoencoder architecture.", + "image": "" + }, + { + "text": "Randomly generated facial images.", + "image": "" + }, + { + "text": "The principal components (or eigenvectors) of a large set of facial images.", + "image": "" + }, + { + "text": "Non-linear transformations of facial images.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Autoencoders: According to the source, what does the value of the eigenvalues in PCA represent?", + "answers": [ + { + "text": "The amount of noise present in the image.", + "image": "" + }, + { + "text": "The size of the dataset.", + "image": "" + }, + { + "text": "The amount of variance each eigenface captures.", + "image": "" + }, + { + "text": "The total number of faces in the dataset.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Autoencoders: How are the weights (or coefficients) for each eigenface found, according to the source?", + "answers": [ + { + "text": "By random initialization.", + "image": "" + }, + { + "text": "By applying a non-linear activation function.", + "image": "" + }, + { + "text": "By projecting the original face onto the eigenfaces.", + "image": "" + }, + { + "text": "By calculating the mean of the image.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Autoencoders: Which of the following is a limitation of using eigenfaces for face recognition as mentioned in the source?", + "answers": [ + { + "text": "They are not sensitive to facial expressions.", + "image": "" + }, + { + "text": "They are highly effective with any data set", + "image": "" + }, + { + "text": "They can capture any non-linear variation of a face", + "image": "" + }, + { + "text": "They are sensitive to variations in lighting, pose, and facial expressions.", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "Autoencoders: What distinguishes a deep autoencoder from a linear autoencoder?", + "answers": [ + { + "text": "A deep autoencoder uses a single hidden layer with linear activation.", + "image": "" + }, + { + "text": "A deep autoencoder projects data onto a non-linear manifold, instead of a subspace.", + "image": "" + }, + { + "text": "A deep autoencoder is only useful for supervised learning tasks", + "image": "" + }, + { + "text": "A deep autoencoder uses only labeled data", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "Autoencoders: What is the key characteristic of an undercomplete autoencoder?", + "answers": [ + { + "text": "The hidden layer has more units than the input layer.", + "image": "" + }, + { + "text": "The embedded space has a lower dimensionality than the input space.", + "image": "" + }, + { + "text": "The model overfits to the training data.", + "image": "" + }, + { + "text": "The hidden layer uses non-linear activation.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "Autoencoders: What is the main challenge with overcomplete autoencoders?", + "answers": [ + { + "text": "They are difficult to train.", + "image": "" + }, + { + "text": "They perform poorly with complex data.", + "image": "" + }, + { + "text": "Without proper constraints, they can overfit by simply copying the input to the output.", + "image": "" + }, + { + "text": "They cannot capture the data's hidden structure.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Autoencoders: What is a stacked autoencoder?", + "answers": [ + { + "text": "An autoencoder with a very small hidden layer.", + "image": "" + }, + { + "text": "An autoencoder consisting of multiple encoding and decoding layers.", + "image": "" + }, + { + "text": "An autoencoder that can only learn linear features.", + "image": "" + }, + { + "text": "An autoencoder that operates exclusively on labeled data.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "Autoencoders: According to the source, what does the layer-wise training of a stacked autoencoder achieve?", + "answers": [ + { + "text": "It leads to overfitting.", + "image": "" + }, + { + "text": "It makes training more complex.", + "image": "" + }, + { + "text": "It simplifies optimization and ensures meaningful feature learning at each step.", + "image": "" + }, + { + "text": "It eliminates the need for pre-training.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Autoencoders: What is a denoising autoencoder (DAE) designed to do?", + "answers": [ + { + "text": "To compress data without loss.", + "image": "" + }, + { + "text": "To learn robust, noise-resistant representations by reconstructing clean data from noisy inputs.", + "image": "" + }, + { + "text": "To reduce the dimensionality of the input data.", + "image": "" + }, + { + "text": "To only use clean data as input.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "Autoencoders: How does a denoising autoencoder achieve noise-resistance?", + "answers": [ + { + "text": "By reducing the size of the hidden layer.", + "image": "" + }, + { + "text": "By training on noisy data and reconstructing the clean version.", + "image": "" + }, + { + "text": "By adding random noise to the output.", + "image": "" + }, + { + "text": "By adding labels to the input data.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "Autoencoders: What is a key characteristic of a sparse autoencoder?", + "answers": [ + { + "text": "The hidden layer has fewer units than the input.", + "image": "" + }, + { + "text": "Most hidden units should have zero activation.", + "image": "" + }, + { + "text": "It uses only labeled data.", + "image": "" + }, + { + "text": "It maps the data onto a linear subspace.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "Autoencoders: What is the purpose of the sparsity penalty in a sparse autoencoder?", + "answers": [ + { + "text": "To increase the complexity of the model.", + "image": "" + }, + { + "text": "To encourage most hidden units to be inactive.", + "image": "" + }, + { + "text": "To increase the amount of noise", + "image": "" + }, + { + "text": "To reduce the reconstruction error to zero.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "Autoencoders: According to the source, what does the Kullback-Leibler (KL) divergence measure in the context of sparse autoencoders?", + "answers": [ + { + "text": "The reconstruction error.", + "image": "" + }, + { + "text": "The amount of noise in the input data.", + "image": "" + }, + { + "text": "The difference between the average activation of hidden units and a target sparsity value.", + "image": "" + }, + { + "text": "The dimensionality of the latent space.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Autoencoders: a) Classify data into predefined categories.", + "answers": [ + { + "text": "Classify data into predefined categories.", + "image": "" + }, + { + "text": "Reconstruct its input by predicting an approximation.", + "image": "" + }, + { + "text": "Generate new data samples similar to the input.", + "image": "" + }, + { + "text": "Reduce the dimensionality of the data for visualization purposes only.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "Autoencoders: The 'bottleneck' in an autoencoder architecture refers to:", + "answers": [ + { + "text": "The activation function used in the output layer.", + "image": "" + }, + { + "text": "The hidden layer with a significantly smaller dimensionality than the input.", + "image": "" + }, + { + "text": "The initial weight matrices of the encoder and decoder.", + "image": "" + }, + { + "text": "The loss function that the network tries to minimize.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "Autoencoders: What is the primary purpose of the dimensionality constraint in autoencoders?", + "answers": [ + { + "text": "To increase the computational speed of the network.", + "image": "" + }, + { + "text": "To force the network to learn meaningful patterns and structures in the data instead of memorising the input.", + "image": "" + }, + { + "text": "To allow the network to handle high dimensional input data.", + "image": "" + }, + { + "text": "To reduce the risk of overfitting.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "Autoencoders: Which of the following is NOT a typical application of autoencoders?", + "answers": [ + { + "text": "Data compression.", + "image": "" + }, + { + "text": "Feature learning for downstream tasks.", + "image": "" + }, + { + "text": "Supervised classification with labelled data.", + "image": "" + }, + { + "text": "Dimensionality reduction for visualisation.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Autoencoders: A linear autoencoder, which uses linear activation functions, is functionally equivalent to:", + "answers": [ + { + "text": "A deep neural network.", + "image": "" + }, + { + "text": "Principal Component Analysis (PCA).", + "image": "" + }, + { + "text": "A convolutional neural network.", + "image": "" + }, + { + "text": "A recurrent neural network.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "Autoencoders: In a linear autoencoder, the reconstruction of the input is computed as a linear transformation expressed as:", + "answers": [ + { + "text": "x̂ = U + V + x", + "image": "" + }, + { + "text": "x̂ = UVx", + "image": "" + }, + { + "text": "x̂ = U * V * x", + "image": "" + }, + { + "text": "x̂ = V / U * x", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Autoencoders: According to the source, a linear autoencoder learns to choose a subspace that:", + "answers": [ + { + "text": "Maximises the distance of the data points from the projections.", + "image": "" + }, + { + "text": "Minimises the variance of the projections.", + "image": "" + }, + { + "text": "Minimises the squared distance from the data to the projections and maximises the variance of the projections.", + "image": "" + }, + { + "text": "Is randomly generated each training cycle.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Autoencoders: Eigenfaces are:", + "answers": [ + { + "text": "The result of applying non-linear transformations to faces.", + "image": "" + }, + { + "text": "The principal components (or eigenvectors) of a large set of facial images.", + "image": "" + }, + { + "text": "A set of facial images created by averaging the original dataset.", + "image": "" + }, + { + "text": "A specific type of neural network.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "Autoencoders: What is the purpose of centering the data (subtracting the mean) before performing PCA on a set of face images?", + "answers": [ + { + "text": "To increase the variance of the data", + "image": "" + }, + { + "text": "To reduce the noise in the data", + "image": "" + }, + { + "text": "To standardise the range of pixel values", + "image": "" + }, + { + "text": "To ensure that the data is centered around zero", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "Autoencoders: When recognising faces using eigenfaces, each face is represented as:", + "answers": [ + { + "text": "A new image composed from a random set of pixels", + "image": "" + }, + { + "text": "A set of binary digits", + "image": "" + }, + { + "text": "A compressed image that has a lower resolution", + "image": "" + }, + { + "text": "A weighted sum of eigenfaces", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "Autoencoders: Which of the following is a limitation of using eigenfaces for facial recognition?", + "answers": [ + { + "text": "Their high computational complexity.", + "image": "" + }, + { + "text": "Their inability to generalize to different people.", + "image": "" + }, + { + "text": "Their effectiveness is not dependent on the training data quality.", + "image": "" + }, + { + "text": "Their sensitivity to variations in lighting, pose, and facial expressions.", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "Autoencoders: Deep autoencoders project the data onto a:", + "answers": [ + { + "text": "Linear subspace", + "image": "" + }, + { + "text": "Nonlinear manifold", + "image": "" + }, + { + "text": "Randomly generated vector space", + "image": "" + }, + { + "text": "Discrete set of points", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "Autoencoders: An undercomplete autoencoder is characterized by:", + "answers": [ + { + "text": "A hidden layer larger than the input layer", + "image": "" + }, + { + "text": "The absence of regularization techniques", + "image": "" + }, + { + "text": "A hidden layer smaller than the input layer", + "image": "" + }, + { + "text": "A tendency to overfit the data.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Autoencoders: What is the main challenge associated with overcomplete autoencoders?", + "answers": [ + { + "text": "Difficulty in learning any meaningful features.", + "image": "" + }, + { + "text": "Tendency to underfit the data due to low capacity.", + "image": "" + }, + { + "text": "Overfitting by simply copying the input to the output", + "image": "" + }, + { + "text": "Difficulty with using regularization techniques", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Autoencoders: A stacked autoencoder introduces which property to the network?", + "answers": [ + { + "text": "Linear transformations", + "image": "" + }, + { + "text": "Reduced computational costs", + "image": "" + }, + { + "text": "Hierarchical representations of the input data", + "image": "" + }, + { + "text": "The ability to only use labelled training data", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Autoencoders: In the simplified training of a stacked autoencoder:", + "answers": [ + { + "text": "All layers are trained simultaneously", + "image": "" + }, + { + "text": "The layers are only trained with labelled data", + "image": "" + }, + { + "text": "The output of one layer is used as input to train the next", + "image": "" + }, + { + "text": "All layers share the same weight matrices.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Autoencoders: What is a key feature of denoising autoencoders (DAE)?", + "answers": [ + { + "text": "They only use labelled training data.", + "image": "" + }, + { + "text": "They are trained to generate new data", + "image": "" + }, + { + "text": "They are trained to directly reconstruct the input without using any noise", + "image": "" + }, + { + "text": "They are trained to reconstruct clean data from noisy inputs", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "Autoencoders: Sparse autoencoders enforce sparsity by:", + "answers": [ + { + "text": "Ensuring that all hidden units have non-zero activation.", + "image": "" + }, + { + "text": "Removing some hidden units during training.", + "image": "" + }, + { + "text": "Reducing the size of the hidden layer.", + "image": "" + }, + { + "text": "Encouraging most hidden units to have zero activation", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "Autoencoders: The Kullback-Leibler (KL) divergence in sparse autoencoders is used to:", + "answers": [ + { + "text": "Minimise the reconstruction error", + "image": "" + }, + { + "text": "Match the average activation of hidden units to a target sparsity value", + "image": "" + }, + { + "text": "Maximise the number of active hidden units", + "image": "" + }, + { + "text": "Increase the dimensionality of the hidden layer", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "Autoencoders: What are the weights in the context of face representation using eigenfaces?", + "answers": [ + { + "text": "The pixels that represent the image", + "image": "" + }, + { + "text": "The coefficients found by projecting the original face onto the eigenfaces", + "image": "" + }, + { + "text": "The eigenvectors of the faces", + "image": "" + }, + { + "text": "The eigenfaces themselves", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "VAE: According to the source, the joint probability of observed data 'x' and latent variable 'z' in a VAE is expressed as:", + "answers": [ + { + "text": "p(x,z) = p(x|z) + p(z)", + "image": "" + }, + { + "text": "p(x,z) = p(x|z)p(z)", + "image": "" + }, + { + "text": "p(x,z) = p(z|x)p(x)", + "image": "" + }, + { + "text": "p(x,z) = p(x) / p(z)", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "VAE: The term 'p(x|z)' in the VAE framework represents:", + "answers": [ + { + "text": "The prior distribution of the latent variable.", + "image": "" + }, + { + "text": "The likelihood of observing 'x' given a specific value of 'z'.", + "image": "" + }, + { + "text": "The posterior distribution of 'z' given 'x'.", + "image": "" + }, + { + "text": "The marginal distribution of 'x'.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "VAE: In the VAE model, the probability of the observed data 'x' is obtained by:", + "answers": [ + { + "text": "Maximizing p(x|z) over all possible values of 'z'.", + "image": "" + }, + { + "text": "Integrating p(x|z)p(z) over all possible values of 'z'.", + "image": "" + }, + { + "text": "Calculating the product of p(x|z) and p(z).", + "image": "" + }, + { + "text": "Minimizing the squared difference between 'x' and its reconstruction.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "VAE: According to the source, the encoder in a VAE aims to approximate:", + "answers": [ + { + "text": "p(x|z)", + "image": "" + }, + { + "text": "p(z|x)", + "image": "" + }, + { + "text": "p(z)", + "image": "" + }, + { + "text": "p(x)", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "VAE: The decoder network in a VAE is parameterised by:", + "answers": [ + { + "text": "q(z|x)", + "image": "" + }, + { + "text": "p(x|z)", + "image": "" + }, + { + "text": "p(z)", + "image": "" + }, + { + "text": "p(x)", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "VAE: The encoder in the VAE is denoted as:", + "answers": [ + { + "text": "p(x|z)", + "image": "" + }, + { + "text": "q(z|x)", + "image": "" + }, + { + "text": "p(z)", + "image": "" + }, + { + "text": "p(x)", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "VAE: In a VAE, the Evidence Lower Bound (ELBO) is introduced as a:", + "answers": [ + { + "text": "Direct computation of the marginal likelihood p(x).", + "image": "" + }, + { + "text": "Tractable approximation to the log-likelihood of the observed data.", + "image": "" + }, + { + "text": "A method to calculate the exact posterior p(z|x).", + "image": "" + }, + { + "text": "A way to eliminate the need for integration over the latent space.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "VAE: The ELBO is represented by the expression:", + "answers": [ + { + "text": "E[log p(x|z)] + D_KL(q(z|x)||p(z))", + "image": "" + }, + { + "text": "E[log p(x|z)] - D_KL(q(z|x)||p(z))", + "image": "" + }, + { + "text": "E[log q(z|x)] - D_KL(p(z|x)||q(z))", + "image": "" + }, + { + "text": "E[log p(z|x)] + D_KL(p(z|x)||p(z))", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "VAE: The first term in the ELBO, E[log p(x|z)], represents:", + "answers": [ + { + "text": "The regularization term of the model.", + "image": "" + }, + { + "text": "The distribution of the latent variables", + "image": "" + }, + { + "text": "The reconstruction accuracy of the decoder.", + "image": "" + }, + { + "text": "The deviation of the posterior from the prior.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "VAE: The second term in the ELBO, D_KL(q(z|x)||p(z)), represents:", + "answers": [ + { + "text": "The Kullback-Leibler divergence between the approximate posterior and the prior.", + "image": "" + }, + { + "text": "The cross-entropy between the input and output.", + "image": "" + }, + { + "text": "The variance of the latent space.", + "image": "" + }, + { + "text": "The reconstruction accuracy of the encoder.", + "image": "" + } + ], + "correct": 0, + "image": "" + }, + { + "quest": "VAE: What is the purpose of the KL divergence term in the VAE loss function?", + "answers": [ + { + "text": "To increase the reconstruction error.", + "image": "" + }, + { + "text": "To force the approximate posterior distribution of the latent variables to be close to the prior distribution.", + "image": "" + }, + { + "text": "To make the latent variables deterministic.", + "image": "" + }, + { + "text": "To reduce the computational complexity.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "VAE: The source mentions that if q(z|x) is equal to p(z) then:", + "answers": [ + { + "text": "The model will underfit the data", + "image": "" + }, + { + "text": "The ELBO will be equal to log(p(x))", + "image": "" + }, + { + "text": "The reconstruction of x will have a lot of noise", + "image": "" + }, + { + "text": "The KL-divergence term will be infinite", + "image": "" + } + ], + "correct": 0, + "image": "" + }, + { + "quest": "VAE: According to the source, the approximate posterior q(z|x) is often assumed to be a:", + "answers": [ + { + "text": "Uniform distribution", + "image": "" + }, + { + "text": "Bernoulli distribution", + "image": "" + }, + { + "text": "Gaussian distribution", + "image": "" + }, + { + "text": "Poisson distribution", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "VAE: In a VAE, if we assume that q(z|x) is a Gaussian distribution, then what are the two outputs of the encoder network?", + "answers": [ + { + "text": "The mean and the variance of p(x|z)", + "image": "" + }, + { + "text": "The mean and the variance of p(z)", + "image": "" + }, + { + "text": "The mean and the variance of q(z|x)", + "image": "" + }, + { + "text": "The reconstruction and the original input", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "VAE: The reparameterisation trick in VAEs allows us to:", + "answers": [ + { + "text": "Calculate the reconstruction error without using samples from the latent distribution.", + "image": "" + }, + { + "text": "Backpropagate through the sampling process of the latent variable z.", + "image": "" + }, + { + "text": "Directly use the mean and variance of q(z|x) during training.", + "image": "" + }, + { + "text": "Calculate the KL divergence without the need for approximations.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "VAE: The reparameterisation trick expresses the latent variable ‘z’ as:", + "answers": [ + { + "text": "z = μ + σ", + "image": "" + }, + { + "text": "z = μ + σ ⊙ ε, where ε ~ N(0,I)", + "image": "" + }, + { + "text": "z = μ * σ * ε", + "image": "" + }, + { + "text": "z = μ / σ + ε", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "VAE: In the reparameterisation trick the random variable ε is drawn from:", + "answers": [ + { + "text": "A uniform distribution", + "image": "" + }, + { + "text": "A standard normal distribution", + "image": "" + }, + { + "text": "The posterior distribution q(z|x)", + "image": "" + }, + { + "text": "The prior distribution p(z)", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "Backpropagation: What are the two key requirements for probabilities when using the Softmax classifier?", + "answers": [ + { + "text": "They must be less than zero and sum to one.", + "image": "" + }, + { + "text": "They must be greater than or equal to zero and not sum to one.", + "image": "" + }, + { + "text": "They must be greater than or equal to zero and sum to greater than one.", + "image": "" + }, + { + "text": "They must be greater than or equal to zero and sum to one", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "Backpropagation: In the context of training a classifier, what does Maximum Likelihood Estimation aim to do?", + "answers": [ + { + "text": "To minimise the likelihood of the observed data.", + "image": "" + }, + { + "text": "To choose weights to maximise the likelihood of the observed data.", + "image": "" + }, + { + "text": "To calculate the cross-entropy loss.", + "image": "" + }, + { + "text": "To regularise the model.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "Backpropagation: Cross-entropy is described as the sum of two components. Which of the following correctly identifies these components?", + "answers": [ + { + "text": "Entropy and regularisation.", + "image": "" + }, + { + "text": "Entropy and model complexity.", + "image": "" + }, + { + "text": "Entropy and KL-divergence.", + "image": "" + }, + { + "text": "KL-divergence and regularisation.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Backpropagation: What is the primary effect of L2 regularization on the weights of a model?", + "answers": [ + { + "text": "It increases the magnitude of the weights.", + "image": "" + }, + { + "text": "It \"spreads out\" the weights.", + "image": "" + }, + { + "text": "It makes the model more complex.", + "image": "" + }, + { + "text": "It has no effect on the weights.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "Backpropagation: When using gradient descent to find the best weights (W), what is the relationship between the data loss and the regularization term?", + "answers": [ + { + "text": "They are independent of each other", + "image": "" + }, + { + "text": "They are used separately, data loss first then regularization", + "image": "" + }, + { + "text": "Data loss is applied only if the regularization loss is too high", + "image": "" + }, + { + "text": "They are combined.", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "Backpropagation: Why is deriving gradients on paper considered a bad idea for complex models?", + "answers": [ + { + "text": "It's too simple for non-linear functions", + "image": "" + }, + { + "text": "It is very tedious, requires lots of matrix calculus and needs to be re-derived if loss changes.", + "image": "" + }, + { + "text": "It only works for linear score functions.", + "image": "" + }, + { + "text": "It is not possible.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "Backpropagation: In the context of backpropagation, what are the two gradients that are multiplied using the chain rule?", + "answers": [ + { + "text": "Upstream and downstream gradients", + "image": "" + }, + { + "text": "Upstream and local gradients.", + "image": "" + }, + { + "text": "Input and output gradients", + "image": "" + }, + { + "text": "Weight and bias gradients", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "Backpropagation: In a computational graph, what is the behavior of the \"add gate\" regarding gradients?", + "answers": [ + { + "text": "It swaps multipliers.", + "image": "" + }, + { + "text": "It adds gradients.", + "image": "" + }, + { + "text": "It distributes gradients.", + "image": "" + }, + { + "text": "It routes gradients.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Backpropagation: In a computational graph, what is the behavior of the \"mul gate\" regarding gradients?", + "answers": [ + { + "text": "It adds gradients.", + "image": "" + }, + { + "text": "It distributes gradients", + "image": "" + }, + { + "text": "It \"swaps multiplier\".", + "image": "" + }, + { + "text": "It routes gradiens", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Backpropagation: In a computational graph, what is the behavior of the \"copy gate\" regarding gradients?", + "answers": [ + { + "text": "It swaps multipliers.", + "image": "" + }, + { + "text": "It distributes gradients", + "image": "" + }, + { + "text": "It adds gradients.", + "image": "" + }, + { + "text": "It routes gradients.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Backpropagation: In a computational graph, what is the behaviour of the \"max gate\" regarding gradients?", + "answers": [ + { + "text": "It swaps multipliers.", + "image": "" + }, + { + "text": "It distributes gradients", + "image": "" + }, + { + "text": "It adds gradients.", + "image": "" + }, + { + "text": "It routes gradients.", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "Backpropagation: In a modularized implementation of backpropagation, what is a key function of a gate/node/function object?", + "answers": [ + { + "text": "To only compute the result of an operation.", + "image": "" + }, + { + "text": "To only calculate upstream gradient", + "image": "" + }, + { + "text": "To cache some values for use in backward pass and multiply upstream and local gradients.", + "image": "" + }, + { + "text": "To apply the chain rule on forward pass.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Backpropagation: When dealing with vector derivatives, what is the derivative called when mapping from a vector to a scalar?", + "answers": [ + { + "text": "Jacobian", + "image": "" + }, + { + "text": "Hessian", + "image": "" + }, + { + "text": "Gradient", + "image": "" + }, + { + "text": "Laplacian", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Backpropagation: When dealing with vector derivatives, what is the derivative called when mapping from a vector to a vector?", + "answers": [ + { + "text": "Gradient", + "image": "" + }, + { + "text": "Hessian", + "image": "" + }, + { + "text": "Jacobian", + "image": "" + }, + { + "text": "Laplacian", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Backpropagation: In backpropagation with vectors, what is the loss (L) considered to be?", + "answers": [ + { + "text": "A vector.", + "image": "" + }, + { + "text": "A matrix.", + "image": "" + }, + { + "text": "A tensor.", + "image": "" + }, + { + "text": "A scalar.", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "Backpropagation: In backpropagation with vectors, what is the relationship between the shape of dL/dx and x?", + "answers": [ + { + "text": "dL/dx is always larger than x.", + "image": "" + }, + { + "text": "dL/dx is always smaller than x.", + "image": "" + }, + { + "text": "dL/dx always has the same shape as x.", + "image": "" + }, + { + "text": "Their shapes are unrelated.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Backpropagation: When backpropagating through a matrix multiplication, what is the primary challenge in dealing with Jacobians?", + "answers": [ + { + "text": "They are difficult to compute", + "image": "" + }, + { + "text": "They are always sparse", + "image": "" + }, + { + "text": "They are always dense", + "image": "" + }, + { + "text": "They take too much memory, so we must work with them implicitly", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "Backpropagation: When performing backpropagation with matrices, what does an element of X affect in the output Y (considerando che Y = X x W)?", + "answers": [ + { + "text": "Only one element of y.", + "image": "" + }, + { + "text": "Only one column of y.", + "image": "" + }, + { + "text": "The whole row of y.", + "image": "" + }, + { + "text": "It doesn't affect y.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Backpropagation: What is the core principle of backpropagation?", + "answers": [ + { + "text": "To optimise the loss function directly", + "image": "" + }, + { + "text": "To compute gradients on forward pass", + "image": "" + }, + { + "text": "To recursively apply the chain rule along a computational graph to compute the gradients of all inputs/parameters/intermediates", + "image": "" + }, + { + "text": "To implement forward pass without saving any intermediates", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Backpropagation: What are the two essential methods that nodes implement in a modularized backpropagation system?", + "answers": [ + { + "text": "Train() and Predict()", + "image": "" + }, + { + "text": "Input() and Output()", + "image": "" + }, + { + "text": "Loss() and Regularisation()", + "image": "" + }, + { + "text": "Forward() and Backward()", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "Backpropagation: In a modularized backpropagation system, what is the purpose of the forward() method of a node?", + "answers": [ + { + "text": "To apply the chain rule", + "image": "" + }, + { + "text": "To compute gradients", + "image": "" + }, + { + "text": "To compute the result of an operation and save any intermediates needed for gradient computation.", + "image": "" + }, + { + "text": "To update parameters.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Backpropagation: In a modularized backpropagation system, what is the purpose of the backward() method of a node?", + "answers": [ + { + "text": "To compute the result of an operation", + "image": "" + }, + { + "text": "To save intermediates", + "image": "" + }, + { + "text": "To update parameters", + "image": "" + }, + { + "text": "To apply the chain rule to compute the gradient of the loss function with respect to the inputs", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "Backpropagation: What is the primary difference between a linear classifier and a two-layer neural network?", + "answers": [ + { + "text": "A neural network does not include a linear score function.", + "image": "" + }, + { + "text": "A neural network has a simpler architecture.", + "image": "" + }, + { + "text": "A neural network is linear.", + "image": "" + }, + { + "text": "A neural network introduces a non-linear transformation with an activation function.", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "Backpropagation: According to the sources, what is the consequence of building a neural network without an activation function?", + "answers": [ + { + "text": "It becomes a more powerful non-linear classifier", + "image": "" + }, + { + "text": "It becomes computationally intractable", + "image": "" + }, + { + "text": "It ends up being a linear classifier", + "image": "" + }, + { + "text": "It cannot learn anything", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Backpropagation: What does the Universal Approximation Theorem state in the context of Neural Networks?", + "answers": [ + { + "text": "Neural networks always find a global minimum", + "image": "" + }, + { + "text": "Neural networks always learn a linear function", + "image": "" + }, + { + "text": "A sufficiently large neural network can approximate any discontinuous function", + "image": "" + }, + { + "text": "A sufficiently large neural network can approximate any continuous function", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "Backpropagation: What does the source say about training of Multi-Layer Perceptrons (MLPs)?", + "answers": [ + { + "text": "It is convex.", + "image": "" + }, + { + "text": "It is generally easy.", + "image": "" + }, + { + "text": "It is always optimal.", + "image": "" + }, + { + "text": "It is highly non-convex, with multiple local minima.", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "Backpropagation: What does the source state is a good default choice for an activation function?", + "answers": [ + { + "text": "Sigmoid", + "image": "" + }, + { + "text": "tanh", + "image": "" + }, + { + "text": "ReLU", + "image": "" + }, + { + "text": "ELU", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Backpropagation: What does the source say about using the size of a neural network as a regularizer?", + "answers": [ + { + "text": "It is the best approach to regularize a network", + "image": "" + }, + { + "text": "It is better to use implicit regularization", + "image": "" + }, + { + "text": "It is always possible", + "image": "" + }, + { + "text": "It is not a good idea; stronger regularization methods are preferred", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "Backpropagation: What are the key factors contributing to the advancements in deep learning, according to the provided text?", + "answers": [ + { + "text": "Only massive parallel compute power", + "image": "" + }, + { + "text": "Only availability of large datasets", + "image": "" + }, + { + "text": "Only advances in machine learning over the years", + "image": "" + }, + { + "text": "Availability of large datasets, massive parallel compute power, and advances in machine learning", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "Backpropagation: According to the sources, what is a limitation of hand-crafted features used in traditional machine learning?", + "answers": [ + { + "text": "They are very efficient to compute", + "image": "" + }, + { + "text": "They are very efficient to train", + "image": "" + }, + { + "text": "They are often task specific", + "image": "" + }, + { + "text": "They might be too general or too specific", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "Backpropagation: What is a key characteristic of features in deep learning, in contrast to traditional approaches?", + "answers": [ + { + "text": "They are fixed", + "image": "" + }, + { + "text": "They are hand-crafted", + "image": "" + }, + { + "text": "They are trainable (parameterized)", + "image": "" + }, + { + "text": "They are non-differentiable", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Backpropagation: In deep learning, what does \"end-to-end\" training refer to?", + "answers": [ + { + "text": "The separation of feature extraction and classification", + "image": "" + }, + { + "text": "Hand-crafting of features", + "image": "" + }, + { + "text": "Training each layer of a network separately", + "image": "" + }, + { + "text": "The joint training of feature extraction and classification as a single pipeline", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "Backpropagation: What does the source emphasize about how complex systems in deep learning are built?", + "answers": [ + { + "text": "They use extremely complicated individual blocks", + "image": "" + }, + { + "text": "They are built by hand-crafting the individual components", + "image": "" + }, + { + "text": "They require an extraordinary amount of data", + "image": "" + }, + { + "text": "They are built via composition of simple building blocks", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest":"ConvNets: In the context of Convolutional Neural Networks, a fully connected layer applied to a 32x32x3 image involves stretching the image into a 3072x1 vector. What is the primary drawback of this approach?", + "answers": [ + { + "text": "It increases the computational complexity of the network.", + "image": "" + }, + { + "text": "It destroys the spatial structure of the image.", + "image": "" + }, + { + "text": "It requires more memory than convolutional layers.", + "image": "" + }, + { + "text": "It is difficult to implement.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest":"ConvNets: A convolutional layer processes a 32x32x3 image using a 5x5x3 filter. What does the output of a single filter application produce?", + "answers": [ + { + "text": "A 32x32x1 activation map.", + "image": "" + }, + { + "text": "A 28x28x1 activation map.", + "image": "" + }, + { + "text": "A single number, representing a dot product of a 5x5x3 chunk of the input with the filter, plus bias", + "image": "" + }, + { + "text": "A 5x5x3 feature map.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest":"ConvNets: In a convolutional layer, if you have multiple filters, say six filters, what is the result of applying those filters to a single input image?", + "answers": [ + { + "text": "A single activation map with increased depth.", + "image": "" + }, + { + "text": "Six activation maps which are then stacked up to get a new image of the same size but with a different depth.", + "image": "" + }, + { + "text": "Six activation maps that are averaged to form a single map.", + "image": "" + }, + { + "text": "Six separate images of the same spatial dimension as the input.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest":"ConvNets: What does it mean for a convolutional filter to \"slide\" over the image during the convolution operation?", + "answers": [ + { + "text": "The filter moves across the image, changing its weights at each position.", + "image": "" + }, + { + "text": "The filter is applied to different channels of the input volume sequentially.", + "image": "" + }, + { + "text": "The filter computes dot products with small overlapping patches of the image at each location, resulting in an activation map", + "image": "" + }, + { + "text": "The filter moves in a predetermined pattern, similar to a pooling operation.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest":"ConvNets: What is a key difference between a convolutional layer and a fully connected layer in terms of how they process spatial information?", + "answers": [ + { + "text": "A convolutional layer destroys spatial information, whereas a fully connected layer preserves it.", + "image": "" + }, + { + "text": "A fully connected layer performs dot products between an input vector and a row of weights, while a convolutional layer does not", + "image": "" + }, + { + "text": "A convolutional layer preserves the spatial structure of the input, whereas a fully connected layer stretches the input into a vector.", + "image": "" + }, + { + "text": "A fully connected layer uses filters, whereas convolutional layers do not.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest":"ConvNets: What is the consequence of having 6 separate 5x5 filters in a convolutional layer that acts on a 32x32x3 input?", + "answers": [ + { + "text": "A single 28x28x6 activation map will be obtained.", + "image": "" + }, + { + "text": "Six 28x28x1 activation maps are obtained which are then stacked to get a 28x28x6 \"new image\".", + "image": "" + }, + { + "text": "A 28x28x3 activation map is obtained.", + "image": "" + }, + { + "text": "Six different 32x32x3 images are obtained.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest":"ConvNets: What are the four hyperparameters that a convolutional layer needs?", + "answers": [ + { + "text": "Filter size, stride, number of pooling layers, and number of fully connected layers", + "image": "" + }, + { + "text": "Filter size, stride, padding, and number of pooling layers.", + "image": "" + }, + { + "text": "Number of filters, the filter size, the stride and the zero padding", + "image": "" + }, + { + "text": "Input size, filter size, stride, and number of output channels.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest":"ConvNets: What are the two primary functions of a pooling layer in a CNN?", + "answers": [ + { + "text": "It adds learnable parameters and introduces spatial variance.", + "image": "" + }, + { + "text": "It reduces the size of the representation and introduces spatial invariance.", + "image": "" + }, + { + "text": "It increases the depth of the feature maps and makes the network deeper", + "image": "" + }, + { + "text": "It adds non-linearity and performs non-linear combinations of features.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest":"ConvNets: How does a max pooling layer with 2x2 filters and a stride of 2 operate?", + "answers": [ + { + "text": "It averages the values in each 2x2 region.", + "image": "" + }, + { + "text": "It takes the maximum value in each 2x2 region.", + "image": "" + }, + { + "text": "It multiplies the values in each 2x2 region by a scalar", + "image": "" + }, + { + "text": "It applies a learnable function to each 2x2 region.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest":"ConvNets: What is a trend in recent CNN architectures regarding pooling and fully connected layers?", + "answers": [ + { + "text": "A trend towards more pooling layers and larger fully connected layers", + "image": "" + }, + { + "text": "A trend towards larger filters and wider architectures", + "image": "" + }, + { + "text": "A trend towards smaller filters and deeper architectures", + "image": "" + }, + { + "text": "A trend towards getting rid of pooling and fully connected layers (just CONV layers)", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest":"ConvNets: According to the source, what is a typical structure of CNN architectures historically, before recent advancements like ResNet/GoogLeNet challenged it?", + "answers": [ + { + "text": "A sequence of convolutional layers followed by a single fully connected layer", + "image": "" + }, + { + "text": "A sequence of pooling layers followed by a sequence of convolutional layers", + "image": "" + }, + { + "text": "A repeating pattern of (CONV-RELU)N followed by an optional POOL, repeated M times, followed by (FC-RELU)K, and a final SOFTMAX activation.", + "image": "" + }, + { + "text": "A sequence of fully connected layers followed by a sequence of pooling layers.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "MoreNN: During the training phase, how does dropout modify the activation of a neuron with a dropout rate of p?", + "answers": [ + { + "text": "The neuron's activation is scaled by a factor of p.", + "image": "" + }, + { + "text": "The neuron's activation is multiplied by 1-p.", + "image": "" + }, + { + "text": "The neuron's activation is set to zero with a probability of p, otherwise its activation is preserved.", + "image": "" + }, + { + "text": "The neuron's activation is always set to zero.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "MoreNN: During inference (testing or validation), what adjustment is typically made to neuron activations in a network that uses dropout?", + "answers": [ + { + "text": "No adjustments are made; neurons are used as they are.", + "image": "" + }, + { + "text": "Neuron activations are multiplied by the dropout rate p.", + "image": "" + }, + { + "text": "Neuron activations are scaled down by multiplying by (1-p).", + "image": "" + }, + { + "text": "Neuron activations are set to zero.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "MoreNN: What is the purpose of scaling the activations by 1-p during inference when using dropout?", + "answers": [ + { + "text": "To increase the magnitude of neuron activations.", + "image": "" + }, + { + "text": "To compensate for the fact that fewer neurons were active during training.", + "image": "" + }, + { + "text": "To introduce more randomness during the inference phase.", + "image": "" + }, + { + "text": "To ensure the network learns different features during inference.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "MoreNN: In the \"inverse dropout\" formulation, when is the scaling applied to the activations, and what is the key benefit?", + "answers": [ + { + "text": "Scaling is applied during inference, ensuring faster inference.", + "image": "" + }, + { + "text": "Scaling is applied before training, resulting in faster convergence.", + "image": "" + }, + { + "text": "Scaling is applied during training, ensuring the expected value of the activations remain consistent between training and inference.", + "image": "" + }, + { + "text": "Scaling is applied after the backpropagation, for better generalization.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "MoreNN: What is the \"vanishing gradient\" problem in deep neural networks?", + "answers": [ + { + "text": "A situation where gradients become very large, causing instability in training.", + "image": "" + }, + { + "text": "The tendency of neurons to deactivate randomly during training.", + "image": "" + }, + { + "text": "A phenomenon where gradients become increasingly small as they propagate backward, making training difficult.", + "image": "" + }, + { + "text": "A problem that only occurs in shallow networks.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "MoreNN: What is the main purpose of residual connections (skip connections) in ResNets?", + "answers": [ + { + "text": "To reduce the number of layers needed in a network.", + "image": "" + }, + { + "text": "To add more non-linearity to the network.", + "image": "" + }, + { + "text": "To address the vanishing gradient problem by allowing gradients to flow more easily through the network.", + "image": "" + }, + { + "text": "To speed up training by reducing the number of computations.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "MoreNN: How does a residual connection work mathematically?", + "answers": [ + { + "text": "It replaces the layer's output with the original input.", + "image": "" + }, + { + "text": "It multiplies the layer's output by the input.", + "image": "" + }, + { + "text": "It adds the layer's input directly to its output h = F(x) + x.", + "image": "" + }, + { + "text": "It subtracts the input from the layer's output.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "MoreNN: What happens if the parameters in a residual unit are set such that F(x) = 0?", + "answers": [ + { + "text": "The residual unit outputs a zero vector.", + "image": "" + }, + { + "text": "The residual unit's output becomes exponentially large.", + "image": "" + }, + { + "text": "The residual unit passes the input x through unmodified.", + "image": "" + }, + { + "text": "The unit passes through a zero vector.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "MoreNN: What is the key advantage of residual connections in backpropagation?", + "answers": [ + { + "text": "It simplifies the backpropagation process.", + "image": "" + }, + { + "text": "It ensures that the gradients vanish more quickly.", + "image": "" + }, + { + "text": "It means the derivatives don't vanish as ∇ₓ h = ∇ₓ (F(x) + x) = ∂F / ∂x + I.", + "image": "" + }, + { + "text": "It forces the network to learn different features for every layer.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "MoreNN: What was the impact of Residual Networks (ResNets) on image classification performance on ImageNet?", + "answers": [ + { + "text": "ResNets reduced the performance of image classification.", + "image": "" + }, + { + "text": "ResNets achieved similar results to previous state-of-the-art models.", + "image": "" + }, + { + "text": "ResNets achieved significantly lower error rates than previous models and even human performance, with a 152-layer ResNet achieving 4.49% top-5 error.", + "image": "" + }, + { + "text": "ResNets could only be trained with a limited number of layers", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "MoreNN: What does \"standard scaling\" aim to achieve when preprocessing data?", + "answers": [ + { + "text": "It ensures that each feature has a different mean.", + "image": "" + }, + { + "text": "It ensures each feature (column) has zero mean and unit variance.", + "image": "" + }, + { + "text": "It scales the data between 0 and 1.", + "image": "" + }, + { + "text": "It increases the variance of each feature.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "MoreNN: In standard scaling, what do μ and σ² represent?", + "answers": [ + { + "text": "μ is the sum and σ² is the variance of each feature/column.", + "image": "" + }, + { + "text": "μ is the median and σ² is the standard deviation of each feature/column.", + "image": "" + }, + { + "text": "μ is the mean and σ² is the variance of each feature/column.", + "image": "" + }, + { + "text": "μ is the mean and σ² is the standard deviation of each feature/column.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "MoreNN: What is the main goal of Batch Normalization (BN) in neural networks?", + "answers": [ + { + "text": "To normalize the input data before training.", + "image": "" + }, + { + "text": "To learn an optimal mean and variance for each unit of the network's layers during training.", + "image": "" + }, + { + "text": "To reduce the number of parameters in the network.", + "image": "" + }, + { + "text": "To simplify backpropagation.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "MoreNN: How does Batch Normalization (BN) approximate the mean and variance?", + "answers": [ + { + "text": "By calculating statistics over the entire dataset during each training step.", + "image": "" + }, + { + "text": "By using the data in a mini-batch.", + "image": "" + }, + { + "text": "By using a pre-defined set of values.", + "image": "" + }, + { + "text": "By using the moving average of the previous layer.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "MoreNN: During BN training, how is the output H standardized, and what does ε do?", + "answers": [ + { + "text": "H is standardized by subtracting its mean, and ε adds a small value to prevent division by zero.", + "image": "" + }, + { + "text": "H is standardized by dividing by its variance, and ε increases the variance.", + "image": "" + }, + { + "text": "H is standardized by multiplying by its standard deviation, and ε reduces the mean.", + "image": "" + }, + { + "text": "H is standardized by adding its mean, and ε adjusts the variance.", + "image": "" + } + ], + "correct": 0, + "image": "" + }, + { + "quest": "MoreNN: During BN training, what trainable parameters are introduced to set a new mean and variance for each column j?", + "answers": [ + { + "text": "Two scalars, α and β.", + "image": "" + }, + { + "text": "Two vectors, μ and σ.", + "image": "" + }, + { + "text": "Two 2d values, αj and βj.", + "image": "" + }, + { + "text": "Two matrices, W and b.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "MoreNN: What are two common solutions used during inference to avoid the output depending on the mini-batch when using BN?", + "answers": [ + { + "text": "Training with larger batch sizes and adjusting learning rates.", + "image": "" + }, + { + "text": "Applying a different set of trainable parameters and using dropout.", + "image": "" + }, + { + "text": "Post-training statistics calculation and moving average of statistics.", + "image": "" + }, + { + "text": "Re-training with the entire dataset and applying a different activation function.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "MoreNN: How does Batch Normalization (BN) work with convolutional outputs that have dimensions (b, h, w, c)?", + "answers": [ + { + "text": "?", + "image": "" + }, + { + "text": "It normalizes across all dimensions, including the batch size.", + "image": "" + }, + { + "text": "The mean and variance are computed per channel, normalizing independently across spatial dimensions and batch.", + "image": "" + }, + { + "text": "It normalizes only across the batch size.", + "image": "" + }, + { + "text": "It normalizes across spatial dimensions only.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "MoreNN: What is a limitation of Batch Normalization when using small batch sizes?", + "answers": [ + { + "text": "Batch Normalization becomes more accurate with small batch sizes.", + "image": "" + }, + { + "text": "The variance in the computed mean and variance estimates can become excessively high, leading to unstable training.", + "image": "" + }, + { + "text": "It makes the network simpler to train.", + "image": "" + }, + { + "text": "It decreases the computational overhead.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "MoreNN: How does Layer Normalization differ from Batch Normalization?", + "answers": [ + { + "text": "Layer Normalization normalizes across the batch, while Batch Normalization normalizes across features.", + "image": "" + }, + { + "text": "Layer Normalization normalizes across features, while Batch Normalization normalizes across the mini-batch.", + "image": "" + }, + { + "text": "Layer Normalization introduces learnable parameters, whereas Batch Normalization does not.", + "image": "" + }, + { + "text": "Layer Normalization does not require the calculation of mean and variance.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "MoreNN: In what type of networks is Layer Normalization commonly used?", + "answers": [ + { + "text": "CNNs for image classification.", + "image": "" + }, + { + "text": "Forecasting neural networks working with time series and transformers.", + "image": "" + }, + { + "text": "Generative adversarial networks (GANs).", + "image": "" + }, + { + "text": "Recurrent neural networks (RNNs) for language modeling.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "MoreNN: What is the primary goal of data augmentation?", + "answers": [ + { + "text": "To reduce the size of the training dataset.", + "image": "" + }, + { + "text": "To decrease the complexity of the training data.", + "image": "" + }, + { + "text": "To increase the size of the training dataset effectively by applying random transformations.", + "image": "" + }, + { + "text": "To make the training process faster by using simpler examples.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "MoreNN: What are the typical transformations used in data augmentation?", + "answers": [ + { + "text": "Only geometric transformations like flipping, cropping, and rotating.", + "image": "" + }, + { + "text": "Only color and lighting adjustments like brightness, contrast and saturation.", + "image": "" + }, + { + "text": "A combination of geometric transformations, color and lighting adjustments, noise and distortion, cutout/masking, and combination techniques.", + "image": "" + }, + { + "text": "Only noise addition", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "MoreNN: What is the main purpose of using 1D convolutions for time-series data?", + "answers": [ + { + "text": "To extract global features or patterns that evolve over time.", + "image": "" + }, + { + "text": "To extract local features or patterns that evolve over time.", + "image": "" + }, + { + "text": "To make the time-series data stationary.", + "image": "" + }, + { + "text": "To increase the dimensionality of the input.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "MoreNN: How does a 1D convolution capture local dependencies in time series data?", + "answers": [ + { + "text": "By averaging all time steps.", + "image": "" + }, + { + "text": "By applying a filter of a fixed size that slides over the time series, detecting trends or repeated patterns.", + "image": "" + }, + { + "text": "By only considering the first and last time steps.", + "image": "" + }, + { + "text": "By considering the entire sequence at once.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "MoreNN: What does it mean for a 1D convolution to have parameter sharing?", + "answers": [ + { + "text": "The parameters change over time, allowing to learn specific behaviour for certain time-steps.", + "image": "" + }, + { + "text": "The same filter is applied across all time steps, reducing the number of parameters and improving generalization.", + "image": "" + }, + { + "text": "Different filters are applied to different time steps.", + "image": "" + }, + { + "text": "The parameters are only used for a specific subset of the input.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "MoreNN: What does translation invariance mean in the context of 1D convolutions?", + "answers": [ + { + "text": "The model is sensitive to shifts in the time domain.", + "image": "" + }, + { + "text": "The model does not consider the time order of the input.", + "image": "" + }, + { + "text": "It helps in identifying features that are present at different time steps, making it robust to shifts in the time domain.", + "image": "" + }, + { + "text": "It means the model can only detect patterns at a fixed time step.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "MoreNN: What is the key characteristic of a causal convolution?", + "answers": [ + { + "text": "The output at each time step depends on future time steps.", + "image": "" + }, + { + "text": "The output at each time step depends only on the current and previous time steps.", + "image": "" + }, + { + "text": "The output at each time step is independent of other time steps.", + "image": "" + }, + { + "text": "The output at each time step is influenced by future and past time steps", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "MoreNN: In the context of time series forecasting with a causal model, what is one way to train a model?", + "answers": [ + { + "text": "Pool the output representation H over all time steps and apply a regressor head to predict xn.", + "image": "" + }, + { + "text": "Use a non-causal model to train.", + "image": "" + }, + { + "text": "Pool only the first few steps in the time series.", + "image": "" + }, + { + "text": "Disregard the time dependencies between the series.", + "image": "" + } + ], + "correct": 0, + "image": "" + }, + { + "quest": "MoreNN: What is the difference between how 1D convolution and self-attention model sequences?", + "answers": [ + { + "text": "1D convolution captures global dependencies, while self-attention captures local dependencies.", + "image": "" + }, + { + "text": "1D convolution has quadratic complexity, while self-attention is more efficient.", + "image": "" + }, + { + "text": "1D convolution captures local patterns using a sliding filter, while self-attention computes interactions between all elements.", + "image": "" + }, + { + "text": "1D convolution uses weights for each pair of inputs, while self-attention shares weights.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "MoreNN: In self-attention mechanisms, what are the three transformed vectors derived from each token's embedding?", + "answers": [ + { + "text": "Input, output, and hidden vectors.", + "image": "" + }, + { + "text": "Weight, bias, and activation vectors.", + "image": "" + }, + { + "text": "Query, Key, and Value vectors.", + "image": "" + }, + { + "text": "Gradient, loss, and prediction vectors.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "MoreNN: In the analogy with a web search, what corresponds to the \"Query\" vector in self-attention?", + "answers": [ + { + "text": "The titles of web pages.", + "image": "" + }, + { + "text": "The content of web pages.", + "image": "" + }, + { + "text": "The search term you type—what you're looking for.", + "image": "" + }, + { + "text": "The search engine algorithm.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "MoreNN: What is the role of \"masking\" in masked self-attention?", + "answers": [ + { + "text": "To amplify the attention scores of future tokens.", + "image": "" + }, + { + "text": "To ensure the model only focuses on past tokens when predicting the next token.", + "image": "" + }, + { + "text": "To randomize the attention scores to avoid bias.", + "image": "" + }, + { + "text": "To ignore the past tokens and focus only on future tokens.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "MoreNN: What is the purpose of passing the concatenated outputs of multiple self-attention heads through a Multilayer Perceptron (MLP) layer?", + "answers": [ + { + "text": "To reduce the dimensionality of the output.", + "image": "" + }, + { + "text": "To enhance the model's representational capacity after capturing diverse relationships.", + "image": "" + }, + { + "text": "To compute attention scores.", + "image": "" + }, + { + "text": "To apply positional embeddings.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "MoreNN: What is the \"Add\" operation in a Transformer block?", + "answers": [ + { + "text": "A fully connected layer", + "image": "" + }, + { + "text": "A pooling layer", + "image": "" + }, + { + "text": "A residual connection", + "image": "" + }, + { + "text": "A layer normalization", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "MoreNN: What is the \"Norm\" operation in a Transformer block?", + "answers": [ + { + "text": "A fully connected layer.", + "image": "" + }, + { + "text": "A residual connection", + "image": "" + }, + { + "text": "A Batch Normalization", + "image": "" + }, + { + "text": "A Layer Normalization", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "MoreNN: What is the first step in representing text as input for a transformer?", + "answers": [ + { + "text": "Applying a softmax function to the input text.", + "image": "" + }, + { + "text": "Dividing text into tokens and converting them into numerical vectors called embeddings.", + "image": "" + }, + { + "text": "Normalizing the text using a standard scaler.", + "image": "" + }, + { + "text": "Applying data augmentation techniques to the text.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "MoreNN: What are the main issues with word encoders?", + "answers": [ + { + "text": "They are difficult to train and implement.", + "image": "" + }, + { + "text": "They require huge computational power to represent words.", + "image": "" + }, + { + "text": "They need to detect boundaries of words and treat different forms of the same word as separate types.", + "image": "" + }, + { + "text": "They don't capture the semantic meaning of the words", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "MoreNN: What are the characteristics of character encoders?", + "answers": [ + { + "text": "They increase the complexity of the model and are easy to use.", + "image": "" + }, + { + "text": "They reduce the complexity but are almost impossible to use.", + "image": "" + }, + { + "text": "They make the model more robust and efficient.", + "image": "" + }, + { + "text": "They are ideal for most NLP tasks.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "MoreNN: What is the byte pair encoding (BPE) algorithm used for?", + "answers": [ + { + "text": "To represent each word as a single byte.", + "image": "" + }, + { + "text": "To represent each character in a text as an integer.", + "image": "" + }, + { + "text": "To create subword tokens by merging frequent character sequences.", + "image": "" + }, + { + "text": "To compress text data into smaller files.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "MoreNN: What is the first step in the BPE algorithm?", + "answers": [ + { + "text": "Count the frequency of each character pair in the data", + "image": "" + }, + { + "text": "Merge the characters into one symbol.", + "image": "" + }, + { + "text": "Form a base vocabulary of all characters that occur in the training data.", + "image": "" + }, + { + "text": "Tokenize the data.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "MoreNN: What is the purpose of positional encodings in Transformer models?", + "answers": [ + { + "text": "To reduce the dimensionality of the input.", + "image": "" + }, + { + "text": "To inject order into the model by embedding position-specific information.", + "image": "" + }, + { + "text": "To increase the variance of the input data.", + "image": "" + }, + { + "text": "To prevent the model from overfitting.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "MoreNN: Why are positional encodings needed in transformers?", + "answers": [ + { + "text": "Transformers do not need positional encodings because they can infer the order of the input.", + "image": "" + }, + { + "text": "Because Transformers process all tokens simultaneously, they need positional encodings to be aware of the sequence information.", + "image": "" + }, + { + "text": "Positional encodings are only needed for time-series data.", + "image": "" + }, + { + "text": "Because Transformers can easily capture the order of the sequence.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "MoreNN: How do relative positional embeddings work?", + "answers": [ + { + "text": "They add information about the absolute position of the tokens.", + "image": "" + }, + { + "text": "They only use static positional information.", + "image": "" + }, + { + "text": "They consider the relative distance between tokens instead of their absolute positions.", + "image": "" + }, + { + "text": "They only encode the first and last positions of the tokens.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "MoreNN: What are the outputs of the transformer block and what are they used for?", + "answers": [ + { + "text": "Logits, which represent probabilities of the next token, are used to select the most probable token or sample one.", + "image": "" + }, + { + "text": "Embeddings, which are used for classification tasks.", + "image": "" + }, + { + "text": "Attention scores, which are used for image generation.", + "image": "" + }, + { + "text": "Key and value matrices, used for backpropagation", + "image": "" + } + ], + "correct": 0, + "image": "" + }, + { + "quest": "MoreNN: What is the role of the \"temperature\" hyperparameter when generating text from a language model?", + "answers": [ + { + "text": "The \"temperature\" is used to adjust the learning rate of the model", + "image": "" + }, + { + "text": "It is used to adjust the size of the model", + "image": "" + }, + { + "text": "It is used to control the size of the vocabulary used by the model", + "image": "" + }, + { + "text": "It controls the randomness of the output by sharpening or softening the probability distribution.", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "LR: What is the primary goal of the cost function in linear regression?", + "answers": [ + { + "text": "To maximize the difference between predicted and actual output values.", + "image": "" + }, + { + "text": "To identify the optimal number of features for a model.", + "image": "" + }, + { + "text": "To minimize the error between the predicted values and the actual target values.", + "image": "" + }, + { + "text": "To determine the correlation between input and output variables.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "LR: In the context of gradient descent, which statement accurately describes the effect of the learning rate (α)?", + "answers": [ + { + "text": "A larger learning rate guarantees faster convergence to the global minimum.", + "image": "" + }, + { + "text": "A smaller learning rate might cause the gradient descent to diverge from the minimum.", + "image": "" + }, + { + "text": "If α is too small, gradient descent will be slow, and if α is too large, gradient descent might overshoot the minimum and even diverge.", + "image": "" + }, + { + "text": "The learning rate should automatically decrease over time as gradient descent approaches a local minimum.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "LR: What is the key distinction between \"batch\" gradient descent and stochastic gradient descent (SGD)?", + "answers": [ + { + "text": "Batch gradient descent is an online method, while SGD is an offline method.", + "image": "" + }, + { + "text": "Batch gradient descent updates parameters after each training example, whereas SGD does it using all training examples.", + "image": "" + }, + { + "text": "Batch gradient descent calculates the gradient using all training examples in each iteration, while SGD uses only a single training example to update the gradient in each iteration.", + "image": "" + }, + { + "text": "SGD is slower and requires more iterations than batch gradient descent.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "LR: Which of the following is NOT a method to calculate simple linear regression?", + "answers": [ + { + "text": "Gradient Descent", + "image": "" + }, + { + "text": "Normal equation", + "image": "" + }, + { + "text": "Principal Component Analysis", + "image": "" + }, + { + "text": "Software packages, e.g., NumPy polyfit", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "LR: What is the purpose of feature scaling in linear regression, and how is mean normalization typically applied?", + "answers": [ + { + "text": "To increase the magnitude of the features and make the gradient descent faster.", + "image": "" + }, + { + "text": "Feature scaling ensures that the features are on a similar scale and mean normalization replaces the feature value with *x**i* - *μ**i*, to have approximately zero mean.", + "image": "" + }, + { + "text": "To add random noise to the features in order to prevent overfitting.", + "image": "" + }, + { + "text": "Feature scaling is not required if we are using the normal equation.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "LR: What does the normal equation provide in the context of linear regression?", + "answers": [ + { + "text": "An iterative approach to find the parameters that minimise the cost function.", + "image": "" + }, + { + "text": "A direct analytical method to compute the parameters (Θ) that minimise the cost function.", + "image": "" + }, + { + "text": "An alternative to gradient descent that is faster regardless of the number of features.", + "image": "" + }, + { + "text": "A way to determine the appropriate learning rate for gradient descent.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "LR: When might the normal equation be computationally inefficient and what might be a workaround?", + "answers": [ + { + "text": "When the number of training examples is very high, one should use gradient descent instead", + "image": "" + }, + { + "text": "When the number of features is very high, and as a work-around, delete some features or use regularization", + "image": "" + }, + { + "text": "The normal equation is always computationally efficient, irrespective of the number of features", + "image": "" + }, + { + "text": "It is inefficient when there is correlation, therefore it requires an alternative method", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "LR: According to the sources, what does a Pearson correlation coefficient (r) of -1 signify?", + "answers": [ + { + "text": "No correlation between the variables", + "image": "" + }, + { + "text": "A moderate positive correlation between the variables", + "image": "" + }, + { + "text": "A maximum negative correlation between the variables", + "image": "" + }, + { + "text": "A maximum positive correlation between the variables", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "LR: What does the coefficient of determination (R²) measure in regression analysis?", + "answers": [ + { + "text": "The correlation between variables.", + "image": "" + }, + { + "text": "The goodness-of-fit of a line or curve to the data points.", + "image": "" + }, + { + "text": "The slope of the regression line.", + "image": "" + }, + { + "text": "The complexity of the regression model.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "LR: What is a key characteristic that distinguishes locally-weighted regression from linear regression?", + "answers": [ + { + "text": "Locally-weighted regression uses a fixed set of parameters", + "image": "" + }, + { + "text": "Locally-weighted regression parameters grow with the data making it a non-parametric learning algorithm, whilst linear regression uses a fixed set of parameters.", + "image": "" + }, + { + "text": "Locally-weighted regression is faster than linear regression.", + "image": "" + }, + { + "text": "Locally weighted regression uses gradient descent.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "LR: According to the sources, what is the primary assumption underlying the probabilistic interpretation of least squares?", + "answers": [ + { + "text": "That the target value y is equal to ΘTx plus some random error.", + "image": "" + }, + { + "text": "That the input features are normally distributed.", + "image": "" + }, + { + "text": "That the parameters Θ are fixed and known.", + "image": "" + }, + { + "text": "That the regression line has zero error.", + "image": "" + } + ], + "correct": 0, + "image": "" + }, + { + "quest": "LR2: In the context of the normal equation, what does the term 'X' represent?", + "answers": [ + { + "text": "The vector of predicted values.", + "image": "" + }, + { + "text": "The matrix of target variables.", + "image": "" + }, + { + "text": "The matrix of input features.", + "image": "" + }, + { + "text": "The vector of errors.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "LR2: What does θ represent in the normal equation?", + "answers": [ + { + "text": "The error term", + "image": "" + }, + { + "text": "The predicted values", + "image": "" + }, + { + "text": "The parameter vector that we aim to find", + "image": "" + }, + { + "text": "The input features", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "LR2: Given the cost function J(θ) = 1/2 * ||Xθ − y||², which statement correctly describes how the normal equation is derived?", + "answers": [ + { + "text": "The gradient of J(θ) is set to a non-zero constant to minimise the cost function.", + "image": "" + }, + { + "text": "The cost function is directly minimised by setting its partial derivative to the identity matrix.", + "image": "" + }, + { + "text": "The gradient of J(θ) is set to zero to find the optimal parameter vector.", + "image": "" + }, + { + "text": "The cost function is minimised by setting the second derivative to zero.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "LR2: The normal equation solution θ = (XᵀX)⁻¹Xᵀy can be computed when:", + "answers": [ + { + "text": "XᵀX is a singular matrix.", + "image": "" + }, + { + "text": "X has more rows than columns", + "image": "" + }, + { + "text": "XᵀX is invertible", + "image": "" + }, + { + "text": "X has linearly dependent columns", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "LR2: What is the rank condition for XᵀX to be invertible?", + "answers": [ + { + "text": "X must have a rank equal to the number of rows.", + "image": "" + }, + { + "text": "X must be a symmetric matrix.", + "image": "" + }, + { + "text": "X must have linearly independent columns which equals the number of features (m).", + "image": "" + }, + { + "text": "X must be a square matrix with a determinant of 1.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "LR2: In the equation for θ, θ = (XᵀX)⁻¹Xᵀy what does the (XᵀX)⁻¹ term represent?", + "answers": [ + { + "text": "The pseudo-inverse of the feature matrix", + "image": "" + }, + { + "text": "The transpose of the feature matrix", + "image": "" + }, + { + "text": "The inverse of the matrix product X transpose times X.", + "image": "" + }, + { + "text": "The dot product of X with itself.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "LR2: If XᵀX is not invertible, what can be inferred about the feature matrix X?", + "answers": [ + { + "text": "The feature matrix is not real", + "image": "" + }, + { + "text": "The feature matrix contains all zeros", + "image": "" + }, + { + "text": "The feature matrix has linearly dependent columns", + "image": "" + }, + { + "text": "The feature matrix contains no features", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "LR2: What is a key difference between linear regression and locally weighted regression (LWR)?", + "answers": [ + { + "text": "Linear regression uses a kernel function, while LWR does not.", + "image": "" + }, + { + "text": "LWR assigns weights to data points based on their proximity to the query point, while linear regression does not.", + "image": "" + }, + { + "text": "Linear regression uses a constant for error calculation while LWR does not.", + "image": "" + }, + { + "text": "LWR computes global parameters, while linear regression computes local parameters.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "LR2: In Locally Weighted Regression (LWR), what is the purpose of the kernel function?", + "answers": [ + { + "text": "To perform a linear transformation of the input data.", + "image": "" + }, + { + "text": "To reduce the dimensionality of the feature matrix.", + "image": "" + }, + { + "text": "To give higher weights to points closer to the query point and lower weights to points farther away.", + "image": "" + }, + { + "text": "To transform all data into a standard normal distribution.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "LR2: What does the term 'τ' (tau) represent in the context of the weighting function for Locally Weighted Regression (LWR)?", + "answers": [ + { + "text": "The weighting parameter", + "image": "" + }, + { + "text": "The inverse of the feature matrix", + "image": "" + }, + { + "text": "The variance of the data", + "image": "" + }, + { + "text": "The bandwidth parameter, controlling the width of the kernel.", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "LR2: What is an advantage of locally weighted regression compared to standard linear regression?", + "answers": [ + { + "text": "LWR is always faster to compute than linear regression.", + "image": "" + }, + { + "text": "LWR is not affected by outliers.", + "image": "" + }, + { + "text": "LWR can model non-linear relationships between the input features and the target variable.", + "image": "" + }, + { + "text": "LWR always has a unique solution and no risk of overfitting.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "LR2: Based on the diagram in the source, which of the following can be described as 'overfitting'?", + "answers": [ + { + "text": "The model underfits the data.", + "image": "" + }, + { + "text": "The model perfectly fits all data points including the noise in the data.", + "image": "" + }, + { + "text": "The model has very high flexibility and thus it captures the random variations and noise in the training data, not generalising well to unseen data.", + "image": "" + }, + { + "text": "The model gives a completely inaccurate fit to the data.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "BinaryClass: Given a binary classification scenario, which of the following is the correct interpretation of the notation 'y = 1'?", + "answers": [ + { + "text": "It represents a negative outcome", + "image": "" + }, + { + "text": "It represents an positive outcome", + "image": "" + }, + { + "text": "It indicates an unknown outcome", + "image": "" + }, + { + "text": "It symbolizes the probability of an event", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "BinaryClass: In the context of logistic regression, what does the notation 'p(y|x; θ)' represent?", + "answers": [ + { + "text": "The probability of observing feature 'x' given the parameters 'θ'.", + "image": "" + }, + { + "text": "The probability of the parameters 'θ' given the label 'y' and the feature 'x'.", + "image": "" + }, + { + "text": "The probability of label 'y' given feature 'x' and the parameters 'θ' .", + "image": "" + }, + { + "text": "The likelihood of feature 'x' being present", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "BinaryClass: Based on the notes, what is the primary purpose of the sigmoid function, denoted as 'g(z)'?", + "answers": [ + { + "text": "To directly predict the class label.", + "image": "" + }, + { + "text": "To map the output of a linear combination of features to a probability between 0 and 1.", + "image": "" + }, + { + "text": "To calculate the error in the classification.", + "image": "" + }, + { + "text": "To optimize the parameters 'θ'", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "BinaryClass: According to the notes, how is the decision boundary determined in the context of binary classification using a linear model?", + "answers": [ + { + "text": "By maximizing the probability p(y|x;θ)", + "image": "" + }, + { + "text": "By setting the sigmoid function g(z) to 0", + "image": "" + }, + { + "text": "By finding the line where g(z) = 0.5 which occurs when θ₀ + θ₁x₁ + θ₂x₂ = 0", + "image": "" + }, + { + "text": "By minimizing the cost function J(θ)", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "BinaryClass: In the provided material, what does the notation 'J(θ)' represent?", + "answers": [ + { + "text": "The probability of observing the features given the parameters", + "image": "" + }, + { + "text": "The cost function used to evaluate the model's performance", + "image": "" + }, + { + "text": "The gradient of the model's parameters", + "image": "" + }, + { + "text": "The model's prediction", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "BinaryClass: What is the primary goal of the optimization process with respect to J(θ)?", + "answers": [ + { + "text": "To maximize J(θ)", + "image": "" + }, + { + "text": "To calculate the Hessian matrix", + "image": "" + }, + { + "text": "To minimize J(θ)", + "image": "" + }, + { + "text": "To find the gradient of J(θ)", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "BinaryClass: According to the notes, what is the significance of the term 'yi' in the cost function J(θ)?", + "answers": [ + { + "text": "It represents the predicted label", + "image": "" + }, + { + "text": "It is a feature value", + "image": "" + }, + { + "text": "It is the learning rate", + "image": "" + }, + { + "text": "It represents the true label", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "BinaryClass: Which of the following best describes the update rule for θ using gradient descent, as per the source?", + "answers": [ + { + "text": "θ = θ + α∇J(θ)", + "image": "" + }, + { + "text": "θ = θ - α∇J(θ)", + "image": "" + }, + { + "text": "θ = α∇J(θ)", + "image": "" + }, + { + "text": "θ = ∇J(θ)", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "BinaryClass: In the context of the provided notes, what is the purpose of the expression (sigmoid(z(i)) − y(i)) in the gradient calculation?", + "answers": [ + { + "text": "To calculate the total number of training examples", + "image": "" + }, + { + "text": "To represent the regularization term", + "image": "" + }, + { + "text": "To measure the difference between the predicted probability and the true label", + "image": "" + }, + { + "text": "To compute the Hessian matrix", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "BinaryClass: According to the provided material, what does the Newton-Raphson method aim to accomplish?", + "answers": [ + { + "text": "It uses the gradient to reach an optimum.", + "image": "" + }, + { + "text": "It finds the minimum by directly inverting the Hessian", + "image": "" + }, + { + "text": "It is used to find the roots of a function by updating the parameters using the Hessian matrix", + "image": "" + }, + { + "text": "It is used to calculate the gradient of the cost function", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "BinaryClass: What does the term H in the update equation θ := θ − H⁻¹∇J(θ) refer to (Newton method)?", + "answers": [ + { + "text": "?", + "image": "" + }, + { + "text": "The gradient of the cost function", + "image": "" + }, + { + "text": "The learning rate", + "image": "" + }, + { + "text": "The Hessian matrix of the cost function", + "image": "" + }, + { + "text": "The sigmoid function", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "BinaryClass: What is a key drawback mentioned in the notes regarding the Newton-Raphson method?", + "answers": [ + { + "text": "It converges very slowly.", + "image": "" + }, + { + "text": "It always finds the global minimum.", + "image": "" + }, + { + "text": "It requires the calculation of the inverse of the Hessian matrix which is expensive to compute", + "image": "" + }, + { + "text": "It only works for linear models.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "BinaryClass: According to the material, what algorithm is suggested as a practical alternative to Newton's method?", + "answers": [ + { + "text": "Stochastic gradient descent.", + "image": "" + }, + { + "text": "BFGS (Broyden-Fletcher-Goldfarb-Shanno algorithm)", + "image": "" + }, + { + "text": "Linear Regression.", + "image": "" + }, + { + "text": "The conjugate gradient method.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "BinaryClass: What is the relationship between the gradient and the direction of steepest ascent of a function J(θ)?", + "answers": [ + { + "text": "The gradient points in the direction of the steepest decrease", + "image": "" + }, + { + "text": "The gradient points in the direction of the steepest increase", + "image": "" + }, + { + "text": "The gradient is orthogonal to the direction of the steepest ascent", + "image": "" + }, + { + "text": "The gradient provides no information about the direction of the steepest ascent", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "BinaryClass2: According to the source, what does \"Log Loss\" or \"Negative Log Likelihood\" (NLL) measure in the context of classification?", + "answers": [ + { + "text": "The accuracy of the model’s predictions", + "image": "" + }, + { + "text": "The sum of squared errors.", + "image": "" + }, + { + "text": "The difference between predicted probabilities and true labels", + "image": "" + }, + { + "text": "The margin of separation between classes.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "BinaryClass2: What does the term 'logits' refer to in the document?", + "answers": [ + { + "text": "The output of the Softmax function.", + "image": "" + }, + { + "text": "The predicted probabilities of each class", + "image": "" + }, + { + "text": "The raw, unnormalized scores that are input to the Softmax", + "image": "" + }, + { + "text": "The loss calculated during backpropagation.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "BinaryClass2: According to the source, what is the effect of the exponential function within Softmax?", + "answers": [ + { + "text": "To normalize values between 0 and 1.", + "image": "" + }, + { + "text": "To produce a weighted average of the inputs.", + "image": "" + }, + { + "text": "To ensure that all scores are positive", + "image": "" + }, + { + "text": "To amplify the differences between the raw scores", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "BinaryClass2: In the context of the provided document, what is the significance of the term \"cross-entropy\"?", + "answers": [ + { + "text": "It measures the complexity of the model.", + "image": "" + }, + { + "text": "It measures the average number of bits needed to encode data.", + "image": "" + }, + { + "text": "It measures the difference between probability distributions", + "image": "" + }, + { + "text": "It quantifies the uncertainty of predictions for multiple classes", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "BinaryClass2: What does the diagram with the red, green and blue points with decision boundaries represent?", + "answers": [ + { + "text": "The training of a linear regression model.", + "image": "" + }, + { + "text": "The concept of bias, variance and underfitting and overfitting.", + "image": "" + }, + { + "text": "The function of gradient descent", + "image": "" + }, + { + "text": "A graphical representation of the Softmax function", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "BinaryClass2: Based on the document, what is the effect of \"overfitting\" on the model?", + "answers": [ + { + "text": "The model generalizes well to new, unseen data.", + "image": "" + }, + { + "text": "The model memorizes the training data instead of learning the underlying patterns", + "image": "" + }, + { + "text": "The model is too simple to capture the complexity of the data.", + "image": "" + }, + { + "text": "The model has a high bias.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "BinaryClass2: What does 'bias' in the context of model training refer to in the document?", + "answers": [ + { + "text": "The model's tendency to consistently make incorrect assumptions", + "image": "" + }, + { + "text": "The variability in the model’s predictions.", + "image": "" + }, + { + "text": "The amount of training data used.", + "image": "" + }, + { + "text": "The complexity of the model architecture.", + "image": "" + } + ], + "correct": 0, + "image": "" + }, + { + "quest": "BinaryClass2: According to the document, what does 'variance' in the context of model training refer to?", + "answers": [ + { + "text": "The model's ability to make consistent predictions.", + "image": "" + }, + { + "text": "The model’s sensitivity to changes in the training data.", + "image": "" + }, + { + "text": "The model's tendency to make consistent errors.", + "image": "" + }, + { + "text": "The bias in the model's assumptions.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "BinaryClass2: What does the source suggest about the relationship between model complexity and bias and variance?", + "answers": [ + { + "text": "Increasing model complexity always reduces bias and variance.", + "image": "" + }, + { + "text": "Increasing model complexity may reduce bias but increase variance.", + "image": "" + }, + { + "text": "Decreasing model complexity always reduces both bias and variance.", + "image": "" + }, + { + "text": "Bias and variance are not affected by the model complexity.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "BinaryClass2: What do the dashed lines in the model representation diagrams indicate?", + "answers": [ + { + "text": "Hyperplanes that separate the classes.", + "image": "" + }, + { + "text": "The decision boundaries of an overfitted model.", + "image": "" + }, + { + "text": "The margin of separation between classes.", + "image": "" + }, + { + "text": "Areas of underfitting.", + "image": "" + } + ], + "correct": 0, + "image": "" + }, + { + "quest": "BinaryClass2: What does the concept of 'Expected Error' (E(Err)) in the context of the document represent?", + "answers": [ + { + "text": "The bias of the model.", + "image": "" + }, + { + "text": "The variance of the model.", + "image": "" + }, + { + "text": "The sum of Bias and Variance.", + "image": "" + }, + { + "text": "The irreducible error of the data.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "BiasVariance: In the context of bias-variance tradeoff, what does \"high variance\" typically indicate about a machine learning model?", + "answers": [ + { + "text": "The model is too simple and underfits the training data.", + "image": "" + }, + { + "text": "The model is too complex and overfits the training data.", + "image": "" + }, + { + "text": "The model has a strong bias towards a specific class.", + "image": "" + }, + { + "text": "The model has low statistical efficiency.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "BiasVariance: What is the primary purpose of regularization in machine learning?", + "answers": [ + { + "text": "To increase the model's complexity and reduce bias.", + "image": "" + }, + { + "text": "To reduce the model's complexity and prevent overfitting.", + "image": "" + }, + { + "text": "To improve the model's performance on the training data.", + "image": "" + }, + { + "text": "To increase the model's variance.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "BiasVariance: Why is choosing hyperparameters based solely on the training data considered a bad practice?", + "answers": [ + { + "text": "It can lead to a decrease in the model's variance.", + "image": "" + }, + { + "text": "It does not provide information about how the algorithm will perform on new, unseen data.", + "image": "" + }, + { + "text": "It will always choose the least complex model.", + "image": "" + }, + { + "text": "It would always lead to choosing the most complex model.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "BiasVariance: What is the primary advantage of using k-fold cross-validation, compared to a single hold-out validation set?", + "answers": [ + { + "text": "It is less computationally expensive.", + "image": "" + }, + { + "text": "It is better suited for large datasets.", + "image": "" + }, + { + "text": "It makes better use of small datasets.", + "image": "" + }, + { + "text": "It always chooses the most complex model.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "BiasVariance: According to the sources, which of the following represents the correct sequence of steps when using a hold-out cross validation method?", + "answers": [ + { + "text": "Train each model on Sdev, choose the model with lowest error on Strain, optionally evaluate on Stest.", + "image": "" + }, + { + "text": "Split S into Strain, Sdev and Stest, train each model on Strain, choose model with lowest error on Sdev, optionally evaluate on Stest.", + "image": "" + }, + { + "text": "Choose hyperparameters that work best on the test data.", + "image": "" + }, + { + "text": "Split the data into train and test, and choose hyperparameters on the test data.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "BiasVariance: What does the term \"empirical risk\" refer to in the context of machine learning?", + "answers": [ + { + "text": "The risk associated with the variance of a model.", + "image": "" + }, + { + "text": "The risk associated with the bias of a model.", + "image": "" + }, + { + "text": "The error of the model on the training data.", + "image": "" + }, + { + "text": "The generalization performance of the model on new, unseen data.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "BiasVariance: According to the sources, what is the relationship between model complexity and error?", + "answers": [ + { + "text": "As model complexity increases, error always decreases.", + "image": "" + }, + { + "text": "As model complexity decreases, error always decreases.", + "image": "" + }, + { + "text": "There is an optimal level of model complexity that results in the lowest error, typically, increasing complexity will initially decrease error and then will increase it.", + "image": "" + }, + { + "text": "Model complexity does not affect error.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "PCA: What is a manifold in the context of PCA?", + "answers": [ + { + "text": "A high-dimensional space where data points are randomly scattered.", + "image": "" + }, + { + "text": "A topological space that locally resembles Euclidean space, where data may reside.", + "image": "" + }, + { + "text": "A set of basis vectors used for representing data points.", + "image": "" + }, + { + "text": "A non-linear transformation applied to the data.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "PCA: What is a 'chart' in the context of manifolds?", + "answers": [ + { + "text": "A visual representation of data in a scatter plot.", + "image": "" + }, + { + "text": "A function that provides a one-to-one correspondence between open regions of a surface and subsets of Euclidean space.", + "image": "" + }, + { + "text": "A method for reducing the dimensionality of the data.", + "image": "" + }, + { + "text": "A way to visualize the principal components of a dataset.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "PCA: What is the key property of a chart mapping (ϕ)?", + "answers": [ + { + "text": "It must be non-invertible.", + "image": "" + }, + { + "text": "It must be discontinuous.", + "image": "" + }, + { + "text": "It must be smooth and invertible (a diffeomorphism).", + "image": "" + }, + { + "text": "It can be any arbitrary mapping.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "PCA: In the context of unsupervised learning, what is the primary goal?", + "answers": [ + { + "text": "To predict labels for input data.", + "image": "" + }, + { + "text": "To uncover meaningful structures or representations within the data.", + "image": "" + }, + { + "text": "To train a model with labeled outputs.", + "image": "" + }, + { + "text": "To use a supervised learning approach.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "PCA: What is a basis in the context of vector spaces?", + "answers": [ + { + "text": "A set of random vectors used for representing points.", + "image": "" + }, + { + "text": "A set of linearly independent vectors that can be used to reconstruct any point in the space.", + "image": "" + }, + { + "text": "A single vector that captures the variance of the data.", + "image": "" + }, + { + "text": "A set of vectors that overlap and point in similar directions.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "PCA: What is the implication of mean-centering a dataset before applying PCA?", + "answers": [ + { + "text": "It increases the variance of the data.", + "image": "" + }, + { + "text": "It shifts the data away from the origin.", + "image": "" + }, + { + "text": "It ensures the data is centered at the origin, simplifying calculations.", + "image": "" + }, + { + "text": "It makes the data more noisy.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "PCA: Why must basis vectors be linearly independent?", + "answers": [ + { + "text": "To make computations easier.", + "image": "" + }, + { + "text": "To ensure the basis vectors point in similar directions.", + "image": "" + }, + { + "text": "To ensure they span the entire space and can reconstruct any point in the space without overlap or redundancy.", + "image": "" + }, + { + "text": "Linear dependence is required in PCA.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "PCA: In a standard basis, what is unique about the weights when representing a data point?", + "answers": [ + { + "text": "The weights must be numerically solved for.", + "image": "" + }, + { + "text": "The weights are zero for every dimension.", + "image": "" + }, + { + "text": "The weights are simply the values of the data point itself.", + "image": "" + }, + { + "text": "They require complex mathematical functions for computation.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "PCA: What is the significance of an orthonormal basis in the context of PCA?", + "answers": [ + { + "text": "It requires more complex calculations to find the weights.", + "image": "" + }, + { + "text": "It makes the representation of a point more complicated.", + "image": "" + }, + { + "text": "It simplifies the calculation of the weight vector; it can be expressed directly in terms of the spanning set and the data itself.", + "image": "" + }, + { + "text": "It provides a non-unique basis for representing the data.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "PCA: What is the role of the operation CC^T (where C is the basis matrix) in the context of an orthonormal basis?", + "answers": [ + { + "text": "It is a non-linear transformation.", + "image": "" + }, + { + "text": "It scales the data.", + "image": "" + }, + { + "text": "It acts as a projection matrix, ensuring data is represented by the orthonormal basis.", + "image": "" + }, + { + "text": "It adds noise to the data.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "PCA: What happens when the number of spanning vectors (K) is less than the dimensionality of the data space (D)?", + "answers": [ + { + "text": "All points can still be perfectly represented.", + "image": "" + }, + { + "text": "The data becomes more accurate.", + "image": "" + }, + { + "text": "Points can only be approximated but not perfectly represented in the subspace.", + "image": "" + }, + { + "text": "The spanning vectors become linearly dependent.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "PCA: What does the projection of a data point onto a subspace represent?", + "answers": [ + { + "text": "A random transformation of the original data point.", + "image": "" + }, + { + "text": "The 'dropping' of the data point perpendicularly onto the subspace defined by the basis vectors.", + "image": "" + }, + { + "text": "A transformation that moves the data point away from the subspace.", + "image": "" + }, + { + "text": "An increase in the dimensionality of the data.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "PCA: What is learned in Principal Component Analysis (PCA) besides weights?", + "answers": [ + { + "text": "Only the weights are learned.", + "image": "" + }, + { + "text": "An appropriate basis (principal components) is also learned alongside the weights.", + "image": "" + }, + { + "text": "Non-linear transformations of the input data.", + "image": "" + }, + { + "text": "The eigenvalues of the data matrix are minimized.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "PCA: What is the relationship between the PCA least squares cost function and the autoencoder?", + "answers": [ + { + "text": "They are unrelated mathematical concepts.", + "image": "" + }, + { + "text": "The simplified PCA least squares cost function under orthogonality constraint is known as the autoencoder.", + "image": "" + }, + { + "text": "The autoencoder is only used for supervised learning.", + "image": "" + }, + { + "text": "The cost function is always minimized by using non-orthogonal matrices.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "PCA: What are principal components?", + "answers": [ + { + "text": "Randomly chosen vectors that span the data space.", + "image": "" + }, + { + "text": "The elements of the orthonormal basis that point in the directions of the greatest variance in the dataset.", + "image": "" + }, + { + "text": "The weight vectors used to represent each point.", + "image": "" + }, + { + "text": "The eigenvalues of the data covariance matrix.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "PCA: How are principal components computed?", + "answers": [ + { + "text": "By random selection from the dataset.", + "image": "" + }, + { + "text": "As the eigenvectors of the data's correlation matrix (or covariance matrix).", + "image": "" + }, + { + "text": "By a complex non-linear optimization process.", + "image": "" + }, + { + "text": "Using only the standard basis.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "Clustering: In the context of decision trees, what is the primary purpose of a 'splitting variable'?", + "answers": [ + { + "text": "To randomly select a subset of the data to be classified.", + "image": "" + }, + { + "text": "To reduce the number of neighbours in the k-NN algorithm", + "image": "" + }, + { + "text": "To divide the feature space into mutually exclusive regions", + "image": "" + }, + { + "text": "To assign weights to all the samples according to their importance.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Clustering: What is the role of the 'misclassification rate' in the context of building a decision tree?", + "answers": [ + { + "text": "It helps to reduce the data dimensionality.", + "image": "" + }, + { + "text": "It determines the appropriate number of nearest neighbors in a k-NN.", + "image": "" + }, + { + "text": "It quantifies the performance of a given split", + "image": "" + }, + { + "text": "It evaluates the overall performance of the trained model", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Clustering: What is the primary distinction between 'Tree-based methods' and 'Linear regression models' as described in the text?", + "answers": [ + { + "text": "Tree-based methods use Euclidean distance, while linear regression does not.", + "image": "" + }, + { + "text": "Linear regression models are more robust to outliers.", + "image": "" + }, + { + "text": "Tree-based methods partition the input space into rectangles whilst linear regression creates a single partitioning", + "image": "" + }, + { + "text": "Linear regression models are more computationally efficient.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Clustering: In the context of bagging, what is 'bootstrapping'?", + "answers": [ + { + "text": "The random division of the data into training and test sets.", + "image": "" + }, + { + "text": "The technique used to visualize the decision boundaries in tree-based methods", + "image": "" + }, + { + "text": "A method of randomly sampling with replacement from the original dataset", + "image": "" + }, + { + "text": "A process of feature selection that reduces the complexity of the model.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Clustering: How does 'random forest' method build individual trees that are less correlated?", + "answers": [ + { + "text": "By using only a subset of the input samples for the training.", + "image": "" + }, + { + "text": "By pruning the trees to reduce their complexity.", + "image": "" + }, + { + "text": "By randomly choosing a subset of the features at each split", + "image": "" + }, + { + "text": "By weighting the importance of the features.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Clustering: What is the fundamental idea behind 'Boosting' as described in the text?", + "answers": [ + { + "text": "To average the predictions of multiple decision trees.", + "image": "" + }, + { + "text": "To make every tree independent from other trees.", + "image": "" + }, + { + "text": "To build an ensemble of models, where each model corrects the errors of its predecessor", + "image": "" + }, + { + "text": "To select the best performing features among all available ones.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Clustering: In the context of boosting, how are the weights of the training samples adjusted after each boosting step?", + "answers": [ + { + "text": "Weights are assigned to the samples based on their Euclidean distance from the decision boundary.", + "image": "" + }, + { + "text": "The weights are randomly re-distributed to ensure variety in the training data.", + "image": "" + }, + { + "text": "Weights are adjusted to increase the importance of misclassified samples", + "image": "" + }, + { + "text": "The weights of the training samples remain unchanged throughout the boosting process.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Clustering: What is the objective function that is being optimized when fitting a single tree in a boosting model?", + "answers": [ + { + "text": "The misclassification rate.", + "image": "" + }, + { + "text": "A sum of weights of the misclassified examples", + "image": "" + }, + { + "text": "The entropy", + "image": "" + }, + { + "text": "The variance of the labels.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "Clustering: In the context of gradient boosting, what does the ‘gradient’ refer to?", + "answers": [ + { + "text": "The direction of maximum increase of the loss function", + "image": "" + }, + { + "text": "The set of all training samples.", + "image": "" + }, + { + "text": "The change in the feature space", + "image": "" + }, + { + "text": "The number of nodes in the decision tree.", + "image": "" + } + ], + "correct": 0, + "image": "" + }, + { + "quest": "Clustering: Which of the following best describes the core idea behind the k-Nearest Neighbour (k-NN) algorithm as presented in the source?", + "answers": [ + { + "text": "It partitions the feature space into rectangles.", + "image": "" + }, + { + "text": "It determines class membership by identifying the k-nearest data points to a given instance.", + "image": "" + }, + { + "text": "It applies boosting techniques to improve accuracy.", + "image": "" + }, + { + "text": "It uses a linear combination of basis functions.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "Clustering: In the context of tree-based methods, which of the following is NOT a typical criterion for splitting nodes?", + "answers": [ + { + "text": "Maximizing information gain.", + "image": "" + }, + { + "text": "Minimising impurity.", + "image": "" + }, + { + "text": "Maximising the number of features.", + "image": "" + }, + { + "text": "Minimizing the weighted average impurity of child nodes", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Clustering: What is the purpose of the 'margin' in the context of support vector machines as described in the source?", + "answers": [ + { + "text": "To ensure each data point is correctly classified.", + "image": "" + }, + { + "text": "To find a decision boundary that minimizes the number of misclassifications.", + "image": "" + }, + { + "text": "To minimize the computational complexity.", + "image": "" + }, + { + "text": "To maximize the separation between classes.", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "Clustering: The source mentions that the decision rule for k-NN is updated based on which aspect of the k neighbours?", + "answers": [ + { + "text": "Their distances to the decision boundary.", + "image": "" + }, + { + "text": "Their feature values.", + "image": "" + }, + { + "text": "Their class labels.", + "image": "" + }, + { + "text": "Their position in feature space.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Clustering: What is the primary focus of 'Boosting' algorithms, according to the source?", + "answers": [ + { + "text": "To independently fit many decision trees.", + "image": "" + }, + { + "text": "To iteratively fit weak learners while focusing on misclassified instances from previous iterations.", + "image": "" + }, + { + "text": "To linearly separate data into different classes.", + "image": "" + }, + { + "text": "To find the optimal decision boundary using a kernel trick.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "Clustering: In the context of boosting, what is meant by “reweighting” training data?", + "answers": [ + { + "text": "It’s where the values of features are adjusted.", + "image": "" + }, + { + "text": "It is the process of re-assigning training samples to different classes.", + "image": "" + }, + { + "text": "It’s adjusting the weights of the linear function.", + "image": "" + }, + { + "text": "It means increasing the weight of instances that are harder to classify.", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "Clustering: According to the source, what is a 'weak learner' in the context of boosting algorithms?", + "answers": [ + { + "text": "A model that achieves very low training error.", + "image": "" + }, + { + "text": "A model that performs slightly better than random guessing.", + "image": "" + }, + { + "text": "A complex model with high capacity.", + "image": "" + }, + { + "text": "A model that is prone to overfitting.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "Clustering: What is the main objective of the 'objective function' mentioned in the section about boosting?", + "answers": [ + { + "text": "To minimise the number of training samples.", + "image": "" + }, + { + "text": "To maximize the margin between classes.", + "image": "" + }, + { + "text": "To minimise the empirical risk (loss) based on the training data.", + "image": "" + }, + { + "text": "To maximize the number of iterations.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Clustering: According to the source, what is the rationale behind 'regularization' when building the objective function in boosting?", + "answers": [ + { + "text": "To speed up training time.", + "image": "" + }, + { + "text": "To simplify the data.", + "image": "" + }, + { + "text": "To avoid overfitting.", + "image": "" + }, + { + "text": "To convert linear to non-linear problems.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "An image is defined as:", + "answers": [ + { + "text": "A collection of coloured dots.", + "image": "" + }, + { + "text": "A two-dimensional array of numerical values.", + "image": "" + }, + { + "text": "A function that maps locations to pixels.", + "image": "" + }, + { + "text": "A visual representation of objects.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "How many colour combinations are possible for a single pixel in an RGB image, where each colour channel (Red, Green, Blue) has values ranging from 0 to 255?", + "answers": [ + { + "text": "256", + "image": "" + }, + { + "text": "65,536", + "image": "" + }, + { + "text": "16,777,216", + "image": "" + }, + { + "text": "1,048,576", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "What is the primary function of image filtering?", + "answers": [ + { + "text": "To change the colour palette of an image.", + "image": "" + }, + { + "text": "To alter the pixel locations within an image.", + "image": "" + }, + { + "text": "To apply a function to the pixels of an image, without changing their positions.", + "image": "" + }, + { + "text": "To compress the size of an image.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Which of the following is NOT a typical application of image filtering?", + "answers": [ + { + "text": "Image deblurring.", + "image": "" + }, + { + "text": "Improving contrast.", + "image": "" + }, + { + "text": "Noise reduction.", + "image": "" + }, + { + "text": "Increasing image resolution.", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "What is the mathematical operation at the core of 2D convolutions?", + "answers": [ + { + "text": "Subtraction and division.", + "image": "" + }, + { + "text": "Element-wise matrix multiplication and summation.", + "image": "" + }, + { + "text": "Vector dot product.", + "image": "" + }, + { + "text": "Matrix inversion.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "If the kernel was not flipped in a 2D convolution, the operation would be a:", + "answers": [ + { + "text": "Matrix transpose.", + "image": "" + }, + { + "text": "Cross-correlation.", + "image": "" + }, + { + "text": "Dot product.", + "image": "" + }, + { + "text": "Linear transformation.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "What is the purpose of padding in 2D convolutions?", + "answers": [ + { + "text": "To reduce the computational cost of the convolution.", + "image": "" + }, + { + "text": "To maintain the same output dimensions as the input.", + "image": "" + }, + { + "text": "To sharpen the image.", + "image": "" + }, + { + "text": "To blur the image.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "An identity kernel in image filtering will:", + "answers": [ + { + "text": "Sharpen the edges of an image.", + "image": "" + }, + { + "text": "Leave the image unchanged.", + "image": "" + }, + { + "text": "Blur the image significantly.", + "image": "" + }, + { + "text": "Invert the colours of the image.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "A mean blur kernel works by:", + "answers": [ + { + "text": "Multiplying the pixel value by a constant.", + "image": "" + }, + { + "text": "Amplifying the difference between a pixel and its neighbors.", + "image": "" + }, + { + "text": "Averaging a pixel with its surrounding neighbours.", + "image": "" + }, + { + "text": "Giving greater weight to the centre pixel.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "Which filter is known for weighting nearby pixels more heavily than distant ones, leading to a more natural-looking blur?", + "answers": [ + { + "text": "Mean blur filter.", + "image": "" + }, + { + "text": "Sharpening kernel.", + "image": "" + }, + { + "text": "Identity kernel.", + "image": "" + }, + { + "text": "Gaussian blur filter.", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "Which of the following is a property of a Gaussian filter?", + "answers": [ + { + "text": "Non-rotational symmetry.", + "image": "" + }, + { + "text": "It weights distant pixels more than nearby ones.", + "image": "" + }, + { + "text": "Rotational symmetry.", + "image": "" + }, + { + "text": "It enhances the noise in an image.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "What is the separable property of a filter?", + "answers": [ + { + "text": "Applying the filter only to certain parts of the image.", + "image": "" + }, + { + "text": "First convolving rows with a 1D filter, then columns with a 1D filter.", + "image": "" + }, + { + "text": "Applying different filters to different image channels.", + "image": "" + }, + { + "text": "Convolution with multiple kernels", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "In the context of a Gaussian pyramid, what is the first step after starting with the original image?", + "answers": [ + { + "text": "Downsampling the image.", + "image": "" + }, + { + "text": "Applying a Gaussian blur.", + "image": "" + }, + { + "text": "Upsampling the image.", + "image": "" + }, + { + "text": "Applying a sharpening filter.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "How does downsampling change the image size in each level of a Gaussian pyramid?", + "answers": [ + { + "text": "Reduces the size by a factor of 2.", + "image": "" + }, + { + "text": "Reduces the size by a factor of 3.", + "image": "" + }, + { + "text": "Reduces the size by a factor of 4.", + "image": "" + }, + { + "text": "Reduces the size by a factor of 8.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "What is the primary reason for applying a Gaussian blur before downsampling in a Gaussian pyramid?", + "answers": [ + { + "text": "To increase the resolution of the image.", + "image": "" + }, + { + "text": "To sharpen the image before resizing.", + "image": "" + }, + { + "text": "To act as a low-pass filter and prevent aliasing.", + "image": "" + }, + { + "text": "To make the image more colourful.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "What is aliasing, as discussed in the context of image downsampling?", + "answers": [ + { + "text": "The effect of making the image sharper.", + "image": "" + }, + { + "text": "Distortions in the downsampled image caused by undersampling high-frequency components.", + "image": "" + }, + { + "text": "The effect of applying a blur.", + "image": "" + }, + { + "text": "A form of image compression.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "CV Basics: Which of the following best describes the fundamental concept of computer vision, as presented in the material?", + "answers": [ + { + "text": "Generating arrays of numbers that resemble real-world objects, like fruits.", + "image": "" + }, + { + "text": "Solving the 'inverse graphics' problem by inferring the structure of the world from visual cues.", + "image": "" + }, + { + "text": "Creating digital images using a pinhole camera model and digitizers.", + "image": "" + }, + { + "text": "Recognising objects by matching 2D image fragments and their configurations.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "CV Basics: The 'trompe l’oeil' examples in the text primarily illustrate which aspect of computer vision?", + "answers": [ + { + "text": "The challenges of object recognition in cluttered scenes.", + "image": "" + }, + { + "text": "The use of color and shading to create realistic images.", + "image": "" + }, + { + "text": "The exploitation of depth-perception cues and their mathematical modeling.", + "image": "" + }, + { + "text": "The importance of prior expectations in image interpretation.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "CV Basics: According to the provided text, what is a key characteristic of 'basic level categories' in object recognition?", + "answers": [ + { + "text": "They represent the most detailed classification of objects.", + "image": "" + }, + { + "text": "They are the categories that are most difficult for humans to identify quickly.", + "image": "" + }, + { + "text": "They are culturally dependent without any consistency.", + "image": "" + }, + { + "text": "They represent the highest level at which category members have similar perceived shapes and are easily recognized by humans.", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "CV Basics: What is the primary purpose of image filtering, as described in the material?", + "answers": [ + { + "text": "To increase the amount of noise in an image to make edges more apparent.", + "image": "" + }, + { + "text": "To create 3D models of objects from 2D images.", + "image": "" + }, + { + "text": "To enhance image quality, extract features, and reduce noise.", + "image": "" + }, + { + "text": "To generate new images using the principles of graphics.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "CV Basics: In the context of 2D convolution, which of the following steps is essential?", + "answers": [ + { + "text": "Rotating the filter kernel by 90 degrees.", + "image": "" + }, + { + "text": "Mirroring the filter kernel before applying it to the image.", + "image": "" + }, + { + "text": "Applying a non-linear function to the local image patch.", + "image": "" + }, + { + "text": "Only summing the values without multiplication.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "CV Basics: What does the text say about linear systems in the context of image processing?", + "answers": [ + { + "text": "They are used only for non-linear filtering.", + "image": "" + }, + { + "text": "They are characterized by a lack of superposition.", + "image": "" + }, + { + "text": "They exhibit properties such as homogeneity, additivity, and superposition.", + "image": "" + }, + { + "text": "They cannot be represented by matrix operations.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "CV Basics: According to the source material, why is Gaussian averaging preferred over a simple box filter for smoothing?", + "answers": [ + { + "text": "Because box filters are computationally more expensive.", + "image": "" + }, + { + "text": "Because box filters do not reduce noise effectively.", + "image": "" + }, + { + "text": "Because Gaussian averaging gives more weight to nearby pixels, modelling probabilistic inference.", + "image": "" + }, + { + "text": "Because box filters are not separable and therefore cannot be implemented efficiently.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "CV Basics: What is the main problem caused by subsampling without average filtering, according to the text?", + "answers": [ + { + "text": "It makes the image smoother.", + "image": "" + }, + { + "text": "It increases the resolution of the image.", + "image": "" + }, + { + "text": "It leads to aliasing, introducing artifacts in the image.", + "image": "" + }, + { + "text": "It preserves high-frequency information more accurately.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "CV Basics: What is the significance of the 'derivative of Gaussian' in edge detection, according to the source?", + "answers": [ + { + "text": "It is used to enhance noise and amplify variations in the image.", + "image": "" + }, + { + "text": "It directly extracts lines and edges without the need for smoothing.", + "image": "" + }, + { + "text": "It is an approximation of the optimal edge detector under certain assumptions (linear filtering and additive Gaussian noise).", + "image": "" + }, + { + "text": "It is a simplified method used to avoid complex calculations in edge detection.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "CV Basics: In the context of edge detection, what is the role of \"non-maximum suppression\"?", + "answers": [ + { + "text": "To amplify the noise near edges.", + "image": "" + }, + { + "text": "To smooth out the detected edges.", + "image": "" + }, + { + "text": "To thin edges by choosing the largest gradient magnitude along the gradient direction.", + "image": "" + }, + { + "text": "To detect edges at different scales and combine them into a single map.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "CV Basics: What is the Laplacian operator, as presented in the text, and what is it used for?", + "answers": [ + { + "text": "It is a filter that calculates the gradient magnitude of an image.", + "image": "" + }, + { + "text": "It is a smoothing filter that reduces high-frequency information.", + "image": "" + }, + { + "text": "It is a linear filter used to detect edges by identifying zero-crossings of the second derivative.", + "image": "" + }, + { + "text": "It is a filter that is used for color histogram generation.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "CV Basics: What is a primary motivation for using color histograms for object recognition?", + "answers": [ + { + "text": "They are sensitive to geometric transformations.", + "image": "" + }, + { + "text": "They require perfect segmentation of objects.", + "image": "" + }, + { + "text": "They are computationally expensive, however, this is offset by the quality of recognition they provide.", + "image": "" + }, + { + "text": "They are relatively invariant to object translations, image rotations, and partial occlusions.", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "CV Basics: According to the source, what is a limitation of using color histograms for object recognition?", + "answers": [ + { + "text": "They cannot be used for deformable objects such as pullovers.", + "image": "" + }, + { + "text": "They require a large number of training views per object.", + "image": "" + }, + { + "text": "They can be sensitive to changes in illumination conditions.", + "image": "" + }, + { + "text": "They perform poorly when objects are partially occluded.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "CV Basics: Which of the following statements accurately describes the 'Intersection' method for comparing histograms?", + "answers": [ + { + "text": "It calculates the differences between corresponding histogram cells.", + "image": "" + }, + { + "text": "It gives a higher score when there is minimal overlap between histograms.", + "image": "" + }, + { + "text": "It measures the common part of both histograms, with a range between 0 and 1.", + "image": "" + }, + { + "text": "It weights all histogram cells equally regardless of their significance.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "CV Basics: In the context of performance evaluation, what does a confusion matrix help to determine?", + "answers": [ + { + "text": "The optimal parameters for an image filtering algorithm.", + "image": "" + }, + { + "text": "The best method for comparing color histograms.", + "image": "" + }, + { + "text": "The number of true positives, true negatives, false positives, and false negatives for a given classifier and threshold.", + "image": "" + }, + { + "text": "The area under the ROC curve for a specific model.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "CV Basics: What does the term \"recall\" measure in the context of performance evaluation?", + "answers": [ + { + "text": "The proportion of correctly identified negative cases.", + "image": "" + }, + { + "text": "The proportion of actual positives that are correctly identified.", + "image": "" + }, + { + "text": "The overall accuracy of the classification model.", + "image": "" + }, + { + "text": "The proportion of false alarms in the classification process.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "CV Basics: In the context of ROC curves, what does the True Positive Rate (TPR) represent?", + "answers": [ + { + "text": "The rate of false alarms for a given threshold.", + "image": "" + }, + { + "text": "The proportion of correctly identified negative cases.", + "image": "" + }, + { + "text": "The proportion of actual positives that are correctly identified.", + "image": "" + }, + { + "text": "The overall accuracy of the classification model.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "CV Basics: What is the significance of the Area Under the ROC Curve (AUROC)?", + "answers": [ + { + "text": "It represents the trade-off between precision and recall for a classification model.", + "image": "" + }, + { + "text": "It indicates how well a classifier distinguishes between two classes, with a higher AUROC suggesting better performance.", + "image": "" + }, + { + "text": "It helps choose the best comparison method for color histograms.", + "image": "" + }, + { + "text": "It is used to determine the optimal threshold for object detection algorithms.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "CV Basics: According to the material, why is the precision-recall curve preferred for detection tasks?", + "answers": [ + { + "text": "Because it does not require any threshold.", + "image": "" + }, + { + "text": "Because it is less sensitive to noise than other performance metrics.", + "image": "" + }, + { + "text": "Because it is better suited when the number of true negatives is not well-defined, such as in detection tasks.", + "image": "" + }, + { + "text": "Because it gives more importance to the true negative rate.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "CV Basics: Leonardo da Vinci's observations about the camera obscura, as described in the text, highlight which fundamental principle of image formation?", + "answers": [ + { + "text": "The principle of digital image processing.", + "image": "" + }, + { + "text": "The formation of a reversed and reduced image through a small aperture.", + "image": "" + }, + { + "text": "The concept of linear filtering in image enhancement.", + "image": "" + }, + { + "text": "The use of color histograms for object recognition.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "CV Basics: The text refers to computer vision as the problem of 'inverse graphics'. What does this imply about the goals of computer vision?", + "answers": [ + { + "text": "Computer vision aims to generate images that are indistinguishable from real-world scenes.", + "image": "" + }, + { + "text": "Computer vision seeks to create digital images by using the pinhole camera model.", + "image": "" + }, + { + "text": "Computer vision tries to infer the properties of the world from images, reversing the process of graphics which creates images from the world.", + "image": "" + }, + { + "text": "Computer vision focuses on the analysis of color histograms for object identification.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "CV Basics: According to the text, what is the significance of the 'pictorial structure' model in object recognition?", + "answers": [ + { + "text": "It only uses 3D models for object recognition.", + "image": "" + }, + { + "text": "It relies on color histograms to identify objects.", + "image": "" + }, + { + "text": "It represents objects as combinations of 2D image fragments and their configurations.", + "image": "" + }, + { + "text": "It is a simple method that can overcome all complexities of object recognition.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "CV Basics: What does the material say about the challenges of visual categorization?", + "answers": [ + { + "text": "They are limited to problems with occlusions.", + "image": "" + }, + { + "text": "They are not affected by multi-scale, multi-view variations.", + "image": "" + }, + { + "text": "They include issues such as multi-scale, multi-view, multi-class, varying illumination, occlusion, cluttered backgrounds, articulation, and high intraclass variance/low interclass variance.", + "image": "" + }, + { + "text": "They are easily solved by basic linear filtering techniques.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "CV Basics: What is the role of a 'filter kernel' in the context of image filtering?", + "answers": [ + { + "text": "It is used to digitize analog images.", + "image": "" + }, + { + "text": "It is only useful for non-linear operations.", + "image": "" + }, + { + "text": "It is a small matrix that is used to apply some function to a local image patch during convolution.", + "image": "" + }, + { + "text": "It represents the output image, after applying the convolution.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "CV Basics: According to the source material, what is the primary goal of using linear filtering for smoothing an image?", + "answers": [ + { + "text": "To enhance the edges and details in an image.", + "image": "" + }, + { + "text": "To create a sharper version of the image.", + "image": "" + }, + { + "text": "To reduce noise and fill in missing information.", + "image": "" + }, + { + "text": "To perform non-linear operations on an image.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "CV Basics: Why is the concept of 'separability' important in the context of Gaussian filtering?", + "answers": [ + { + "text": "Because it makes the filter non-linear.", + "image": "" + }, + { + "text": "Because it allows for efficient implementation of the filtering operation by applying 1D filters sequentially.", + "image": "" + }, + { + "text": "Because it increases the smoothing effect on an image.", + "image": "" + }, + { + "text": "Because it reduces the computational cost of applying a box filter.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "CV Basics: What is the main idea behind using a Gaussian pyramid for multi-scale image representation?", + "answers": [ + { + "text": "To reduce the resolution of images for easier processing.", + "image": "" + }, + { + "text": "To apply linear filtering in a single scale.", + "image": "" + }, + { + "text": "To represent an image at different scales by repeated smoothing and subsampling.", + "image": "" + }, + { + "text": "To compute the 2nd derivative of an image.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "CV Basics: In the context of edge detection, why is smoothing an image prior to computing derivatives beneficial?", + "answers": [ + { + "text": "It enhances the noise, to see edges more clearly.", + "image": "" + }, + { + "text": "It ensures edges are not affected by lighting changes.", + "image": "" + }, + { + "text": "It reduces the impact of noise and small variations, which can interfere with detecting true edges.", + "image": "" + }, + { + "text": "It makes the edges thicker and more visible.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "CV Basics: According to the text, what does the magnitude of the gradient measure in edge detection?", + "answers": [ + { + "text": "The direction of the edge.", + "image": "" + }, + { + "text": "The noise level around an edge.", + "image": "" + }, + { + "text": "The strength of an edge.", + "image": "" + }, + { + "text": "The scale of the image where edges are more evident.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "CV Basics: What is the main advantage of using the Canny edge detector over other edge detection methods, according to the text?", + "answers": [ + { + "text": "It is faster and less computationally intensive than other methods.", + "image": "" + }, + { + "text": "It is an approximation of the optimal edge detector under the assumptions of linear filtering and additive Gaussian noise, offering a good trade-off between detection and localization.", + "image": "" + }, + { + "text": "It does not require any parameter tuning for different images.", + "image": "" + }, + { + "text": "It is simpler to implement and more robust in noisy conditions.", + "image": "" + } + ], + "correct": 1, + "image": "" + }, + { + "quest": "CV Basics: In the context of edge detection using the Laplacian, what are 'zero-crossings' and what do they indicate?", + "answers": [ + { + "text": "They indicate the location of the maximum gradient value.", + "image": "" + }, + { + "text": "They indicate the strength of an edge in an image.", + "image": "" + }, + { + "text": "They are used to calculate color histograms in an image.", + "image": "" + }, + { + "text": "They represent points where the second derivative changes sign, which indicates the location of edges.", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "CV Basics: According to the text, what is a key characteristic of appearance-based object identification/recognition?", + "answers": [ + { + "text": "It relies on explicit 3D models of objects.", + "image": "" + }, + { + "text": "It requires perfect segmentation of the object in the image.", + "image": "" + }, + { + "text": "It represents objects by a collection of 2D images without the need for a 3D model, and it is sufficient to compare the 2D appearances.", + "image": "" + }, + { + "text": "It is invariant to changes in the viewing angle.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "CV Basics: What does the material say about the use of color in object recognition?", + "answers": [ + { + "text": "Color changes under geometric transformations and therefore is not a reliable feature.", + "image": "" + }, + { + "text": "Color is a global feature that is robust to occlusions.", + "image": "" + }, + { + "text": "Color is a local feature that remains relatively constant under geometric transformations and is robust to partial occlusions.", + "image": "" + }, + { + "text": "Color cannot be used for recognition because it is very sensitive to light variations.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "CV Basics: What does a 3D (joint) color histogram represent?", + "answers": [ + { + "text": "The 1D count of pixels of individual R, G, B colors, and luminance.", + "image": "" + }, + { + "text": "The color normalized by intensity.", + "image": "" + }, + { + "text": "The count of pixels for each combination of RGB values.", + "image": "" + }, + { + "text": "A 2D representation of color, for example, using two parameters, r and g.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "CV Basics: According to the text, what is the significance of using a 'chromatic representation' of color?", + "answers": [ + { + "text": "It ensures that the color histograms do not change under rotation.", + "image": "" + }, + { + "text": "It guarantees that the color histogram is robust to occlusion.", + "image": "" + }, + { + "text": "It normalizes colors by intensity, focusing on the color itself rather than its brightness.", + "image": "" + }, + { + "text": "It generates an intensity image that is later used to extract color information.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "CV Basics: What does the Euclidean distance measure in the context of histogram comparison?", + "answers": [ + { + "text": "It measures the differences between the histograms, weighting each cell equally.", + "image": "" + }, + { + "text": "It measures the common part of both histograms.", + "image": "" + }, + { + "text": "It measures if two distributions are statistically different, with a focus on outliers.", + "image": "" + }, + { + "text": "It only measures the distance between the central cells of two histograms.", + "image": "" + } + ], + "correct": 0, + "image": "" + }, + { + "quest": "CV Basics: In the context of histogram comparison, what does the Chi-square measure primarily aim to test?", + "answers": [ + { + "text": "The overlap between the histograms of known objects and a test image.", + "image": "" + }, + { + "text": "The distances between the centers of two histograms.", + "image": "" + }, + { + "text": "Whether two distributions are statistically different.", + "image": "" + }, + { + "text": "If the two images can be considered the same object.", + "image": "" + } + ], + "correct": 2, + "image": "" + }, + { + "quest": "CV Basics: What is the 'nearest-neighbor' strategy for object recognition using histograms, as described in the text?", + "answers": [ + { + "text": "It measures the distance between objects using the Euclidean distance.", + "image": "" + }, + { + "text": "It focuses on the differences between histograms using a Chi-squared measure.", + "image": "" + }, + { + "text": "It looks for the perfect overlap of two histograms.", + "image": "" + }, + { + "text": "It compares a test histogram to a set of known object histograms and selects the one with the best matching score.", + "image": "" + } + ], + "correct": 3, + "image": "" + }, + { + "quest": "CV Basics: According to the material, what is the 'color constancy problem' that affects color histograms?", + "answers": [ + { + "text": "It refers to the fact that colors cannot be used in image recognition.", + "image": "" + }, + { + "text": "It describes a scenario where objects have the same color distribution.", + "image": "" + }, + { + "text": "It is the problem of pixel colors changing due to the illumination conditions.", + "image": "" + }, + { + "text": "It is a problem of color histograms that arises because not all objects can be identified by their color distribution.", + "image": "" + } + ], + "correct": 2, + "image": "" + } +] \ No newline at end of file