diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..4c7db2f --- /dev/null +++ b/.dockerignore @@ -0,0 +1,8 @@ +Dockerfile +.dockerignore +node_modules +npm-debug.log +README.md +.next +docker +.git \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..df543d2 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,55 @@ +# Source: https://github.com/vercel/next.js/blob/canary/examples/with-docker-multi-env/docker/production/Dockerfile +# syntax=docker.io/docker/dockerfile:1 + +FROM node:18-alpine AS base + +# 1. Install dependencies only when needed +FROM base AS deps +# Check https://github.com/nodejs/docker-node/tree/b4117f9333da4138b03a546ec926ef50a31506c3#nodealpine to understand why libc6-compat might be needed. +RUN apk add --no-cache libc6-compat + +WORKDIR /app + +# Install dependencies based on the preferred package manager +COPY package.json yarn.lock* package-lock.json* pnpm-lock.yaml* .npmrc* ./ +RUN \ + if [ -f yarn.lock ]; then yarn --frozen-lockfile; \ + elif [ -f package-lock.json ]; then npm ci; \ + elif [ -f pnpm-lock.yaml ]; then corepack enable pnpm && pnpm i; \ + else echo "Lockfile not found." && exit 1; \ + fi + + +# 2. Rebuild the source code only when needed +FROM base AS builder +WORKDIR /app +COPY --from=deps /app/node_modules ./node_modules +COPY . . +# This will do the trick, use the corresponding env file for each environment. +# COPY .env.production.sample .env.production +RUN npm run build + +# 3. Production image, copy all the files and run next +FROM base AS runner +WORKDIR /app + +ENV NODE_ENV=production + +RUN addgroup -g 1001 -S nodejs +RUN adduser -S nextjs -u 1001 + +COPY --from=builder /app/public ./public + +# Automatically leverage output traces to reduce image size +# https://nextjs.org/docs/advanced-features/output-file-tracing +COPY --from=builder --chown=nextjs:nodejs /app/.next/standalone ./ +COPY --from=builder --chown=nextjs:nodejs /app/.next/static ./.next/static + + +USER nextjs + +EXPOSE 3000 + +ENV PORT=3000 + +CMD HOSTNAME="0.0.0.0" node server.js \ No newline at end of file diff --git a/pages/CSE559A/CSE559A_L11.md b/pages/CSE559A/CSE559A_L11.md new file mode 100644 index 0000000..0fb03b3 --- /dev/null +++ b/pages/CSE559A/CSE559A_L11.md @@ -0,0 +1,141 @@ +# CSE559A Lecture 11 + +## Continue on Architecture of CNNs + +### AlexNet (2012-2013) + +Successor of LeNet-5, but with a few significant changes + +- Max pooling, ReLU nonlinearity +- Dropout regularization +- More data and bigger model (7 hidden layers, 650K units, 60M params) +- GPU implementation (50x speedup over CPU) + - Trained on two GPUs for a week + +#### Architecture for AlexNet + +- Input: 224x224x3 +- 11x11 conv, stride 4, 96 filters +- 3x3 max pooling, stride 2 +- 5x5 conv, 256 filters, padding 2 +- 3x3 max pooling, stride 2 +- 3x3 conv, 384 filters, padding 1 +- 3x3 conv, 384 filters, padding 1 +- 3x3 conv, 256 filters, padding 1 +- 3x3 max pooling, stride 2 +- 4096-unit FC, ReLU +- 4096-unit FC, ReLU +- 1000-unit FC, softmax + +#### Key points for AlexNet + +Most floating point operations occur in the convolutional layers. + +Most of the memory usage is in the early convolutional layers. + +Nearly all parameters are in the fully-connected layers. + +#### Further refinement (ZFNet, 2013) + +Best paper award at ILSVRC 2013. + +Nicely visualizes the feature maps. + +### VGGNet (2014) + +All the cov layers are 3x3 filters with stride 1 and padding 1. Take advantage of pooling to reduce the spatial dimensionality. + +#### Architecture for VGGNet + +- Input: 224x224x3 +- 3x3 conv, 64 filters, padding 1 +- 3x3 conv, 64 filters, padding 1 +- 2x2 max pooling, stride 2 +- 3x3 conv, 128 filters, padding 1 +- 3x3 conv, 128 filters, padding 1 +- 2x2 max pooling, stride 2 +- 3x3 conv, 256 filters, padding 1 +- 3x3 conv, 256 filters, padding 1 +- 2x2 max pooling, stride 2 +- 3x3 conv, 512 filters, padding 1 +- 3x3 conv, 512 filters, padding 1 +- 3x3 conv, 512 filters, padding 1 +- 2x2 max pooling, stride 2 +- 3x3 conv, 512 filters, padding 1 +- 3x3 conv, 512 filters, padding 1 +- 3x3 conv, 512 filters, padding 1 +- 2x2 max pooling, stride 2 +- 4096-unit FC, ReLU +- 4096-unit FC, ReLU +- 1000-unit FC, softmax + +#### Key points for VGGNet + +- Sequence of deeper networks trained progressively +- Large receptive fields replaced by successive layer of 3x3 convs with relu in between + - 7x7 takes $49K^2$ parameters, 3x3 takes $27K^2$ parameters + +#### Pretrained models + +- Use pretrained-network as feature extractor (removing the last layer and training a new linear layer) (transfer learning) + - Add RNN layers to generate captions +- Fine-tune the model for the new task (finetuning) + - Keep the earlier layers fixed and only train the new prediction layer + +### GoogLeNet (2014) + +Stem network at the start aggressively downsamples input. + +#### Key points for GoogLeNet + +- Parallel paths with different receptive field size and operations are means to capture space patterns of correlations in the stack of feature maps +- Use 1x1 convs to reduce dimensionality +- Use Global Average Pooling (GAP) to replace the fully connected layer +- Auxiliary classifiers to improve training + - Training using loss at the end of the network didn't work well: network is too deep, gradient don't provide useful model updates + - As a hack, attach "auxiliary classifiers" at several intermediate points in the network that also try to classify the image and receive loss + - _GooLeNet was before batch normalization, with batch normalization, the auxiliary classifiers were removed._ + +### ResNet (2015) + +152 layers + +[ResNet paper](https://arxiv.org/abs/1512.03385) + +#### Key points for ResNet + +- The residual module + - Introduce `skip` or `shortcut` connections to avoid the degradation problem + - Make it easy for network layers to represent the identity mapping +- Directly performing 3×3 convolutions with 256 feature maps at input and output: + - $256 \times 256 \times 3 \times 3 \approx 600K$ operations + - Using 1×1 convolutions to reduce 256 to 64 feature maps, followed by 3×3 convolutions, followed by 1×1 convolutions to expand back to 256 maps: + - $256 \times 64 \times 1 \times 1 \approx 16K$ + - $64 \times 64 \times 3 \times 3 \approx 36K$ + - $64 \times 256 \times 1 \times 1 \approx 16K$ + - Total $\approx 70K$ + +_Possibly the first model with top-5 error rate better than human performance._ + +### Beyond ResNet (2016 and onward): Wide ResNet, ResNeXT, DenseNet + +#### Wide ResNet + +Reduce number of residual blocks, but increase number of feature maps in each block + +- More parallelizable, better feature reuse +- 16-layer WRN outperforms 1000-layer ResNets, though with much larger # of parameters + +#### ResNeXt + +- Propose “cardinality” as a new factor in network design, apart from depth and width +- Claim that increasing cardinality is a better way to increase capacity than increasing depth or width + +#### DenseNet + +- Use Dense block between conv layers +- Less parameters than ResNet + +Next class: + +Transformer architectures diff --git a/pages/CSE559A/_meta.js b/pages/CSE559A/_meta.js index 703c544..f64bd58 100644 --- a/pages/CSE559A/_meta.js +++ b/pages/CSE559A/_meta.js @@ -13,4 +13,5 @@ export default { CSE559A_L8: "Computer Vision (Lecture 8)", CSE559A_L9: "Computer Vision (Lecture 9)", CSE559A_L10: "Computer Vision (Lecture 10)", + CSE559A_L11: "Computer Vision (Lecture 11)", }