diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..4c7db2f
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,8 @@
+Dockerfile
+.dockerignore
+node_modules
+npm-debug.log
+README.md
+.next
+docker
+.git
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..df543d2
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,55 @@
+# Source: https://github.com/vercel/next.js/blob/canary/examples/with-docker-multi-env/docker/production/Dockerfile
+# syntax=docker.io/docker/dockerfile:1
+
+FROM node:18-alpine AS base
+
+# 1. Install dependencies only when needed
+FROM base AS deps
+# Check https://github.com/nodejs/docker-node/tree/b4117f9333da4138b03a546ec926ef50a31506c3#nodealpine to understand why libc6-compat might be needed.
+RUN apk add --no-cache libc6-compat
+
+WORKDIR /app
+
+# Install dependencies based on the preferred package manager
+COPY package.json yarn.lock* package-lock.json* pnpm-lock.yaml* .npmrc* ./
+RUN \
+  if [ -f yarn.lock ]; then yarn --frozen-lockfile; \
+  elif [ -f package-lock.json ]; then npm ci; \
+  elif [ -f pnpm-lock.yaml ]; then corepack enable pnpm && pnpm i; \
+  else echo "Lockfile not found." && exit 1; \
+  fi
+
+
+# 2. Rebuild the source code only when needed
+FROM base AS builder
+WORKDIR /app
+COPY --from=deps /app/node_modules ./node_modules
+COPY . .
+# This will do the trick, use the corresponding env file for each environment.
+# COPY .env.production.sample .env.production
+RUN npm run build
+
+# 3. Production image, copy all the files and run next
+FROM base AS runner
+WORKDIR /app
+
+ENV NODE_ENV=production
+
+RUN addgroup -g 1001 -S nodejs
+RUN adduser -S nextjs -u 1001
+
+COPY --from=builder /app/public ./public
+
+# Automatically leverage output traces to reduce image size
+# https://nextjs.org/docs/advanced-features/output-file-tracing
+COPY --from=builder --chown=nextjs:nodejs /app/.next/standalone ./
+COPY --from=builder --chown=nextjs:nodejs /app/.next/static ./.next/static
+
+
+USER nextjs
+
+EXPOSE 3000
+
+ENV PORT=3000
+
+CMD HOSTNAME="0.0.0.0" node server.js
\ No newline at end of file
diff --git a/pages/CSE559A/CSE559A_L11.md b/pages/CSE559A/CSE559A_L11.md
new file mode 100644
index 0000000..0fb03b3
--- /dev/null
+++ b/pages/CSE559A/CSE559A_L11.md
@@ -0,0 +1,141 @@
+# CSE559A Lecture 11
+
+## Continue on Architecture of CNNs
+
+### AlexNet (2012-2013)
+
+Successor of LeNet-5, but with a few significant changes
+
+- Max pooling, ReLU nonlinearity
+- Dropout regularization
+- More data and bigger model (7 hidden layers, 650K units, 60M params)
+- GPU implementation (50x speedup over CPU)
+  - Trained on two GPUs for a week
+
+#### Architecture for AlexNet
+
+- Input: 224x224x3
+- 11x11 conv, stride 4, 96 filters
+- 3x3 max pooling, stride 2
+- 5x5 conv, 256 filters, padding 2
+- 3x3 max pooling, stride 2
+- 3x3 conv, 384 filters, padding 1
+- 3x3 conv, 384 filters, padding 1
+- 3x3 conv, 256 filters, padding 1
+- 3x3 max pooling, stride 2
+- 4096-unit FC, ReLU
+- 4096-unit FC, ReLU
+- 1000-unit FC, softmax
+
+#### Key points for AlexNet
+
+Most floating point operations occur in the convolutional layers.
+
+Most of the memory usage is in the early convolutional layers.
+
+Nearly all parameters are in the fully-connected layers.
+
+#### Further refinement (ZFNet, 2013)
+
+Best paper award at ILSVRC 2013.
+
+Nicely visualizes the feature maps.
+
+### VGGNet (2014)
+
+All the cov layers are 3x3 filters with stride 1 and padding 1. Take advantage of pooling to reduce the spatial dimensionality.
+
+#### Architecture for VGGNet
+
+- Input: 224x224x3
+- 3x3 conv, 64 filters, padding 1
+- 3x3 conv, 64 filters, padding 1
+- 2x2 max pooling, stride 2
+- 3x3 conv, 128 filters, padding 1
+- 3x3 conv, 128 filters, padding 1
+- 2x2 max pooling, stride 2
+- 3x3 conv, 256 filters, padding 1
+- 3x3 conv, 256 filters, padding 1
+- 2x2 max pooling, stride 2
+- 3x3 conv, 512 filters, padding 1
+- 3x3 conv, 512 filters, padding 1
+- 3x3 conv, 512 filters, padding 1
+- 2x2 max pooling, stride 2
+- 3x3 conv, 512 filters, padding 1
+- 3x3 conv, 512 filters, padding 1
+- 3x3 conv, 512 filters, padding 1
+- 2x2 max pooling, stride 2
+- 4096-unit FC, ReLU
+- 4096-unit FC, ReLU
+- 1000-unit FC, softmax
+
+#### Key points for VGGNet
+
+- Sequence of deeper networks trained progressively
+- Large receptive fields replaced by successive layer of 3x3 convs with relu in between
+  - 7x7 takes $49K^2$ parameters, 3x3 takes $27K^2$ parameters
+
+#### Pretrained models
+
+- Use pretrained-network as feature extractor (removing the last layer and training a new linear layer) (transfer learning)
+  - Add RNN layers to generate captions
+- Fine-tune the model for the new task (finetuning)
+  - Keep the earlier layers fixed and only train the new prediction layer
+
+### GoogLeNet (2014)
+
+Stem network at the start aggressively downsamples input.
+
+#### Key points for GoogLeNet
+
+- Parallel paths with different receptive field size and operations are means to capture space patterns of correlations in the stack of feature maps
+- Use 1x1 convs to reduce dimensionality
+- Use Global Average Pooling (GAP) to replace the fully connected layer
+- Auxiliary classifiers to improve training
+  - Training using loss at the end of the network didn't work well: network is too deep, gradient don't provide useful model updates
+  - As a hack, attach "auxiliary classifiers" at several intermediate points in the network that also try to classify the image and receive loss
+  - _GooLeNet was before batch normalization, with batch normalization, the auxiliary classifiers were removed._
+
+### ResNet (2015)
+
+152 layers
+
+[ResNet paper](https://arxiv.org/abs/1512.03385)
+
+#### Key points for ResNet
+
+- The residual module
+  - Introduce `skip` or `shortcut` connections to avoid the degradation problem
+  - Make it easy for network layers to represent the identity mapping
+- Directly performing 3×3 convolutions with 256 feature maps at input and output:   
+  - $256 \times 256 \times 3 \times 3 \approx 600K$ operations
+  - Using 1×1 convolutions to reduce 256 to 64 feature maps, followed by 3×3 convolutions, followed by 1×1 convolutions to expand back to 256 maps:
+    - $256 \times 64 \times 1 \times 1 \approx 16K$
+    - $64 \times 64 \times 3 \times 3 \approx 36K$
+    - $64 \times 256 \times 1 \times 1 \approx 16K$
+    - Total $\approx 70K$
+
+_Possibly the first model with top-5 error rate better than human performance._
+
+### Beyond ResNet (2016 and onward): Wide ResNet, ResNeXT, DenseNet
+
+#### Wide ResNet
+
+Reduce number of residual blocks, but increase number of feature maps in each block
+
+- More parallelizable, better feature reuse
+- 16-layer WRN outperforms 1000-layer ResNets, though with much larger # of parameters
+
+#### ResNeXt
+
+- Propose “cardinality” as a new factor in network design, apart from depth and width
+- Claim that increasing cardinality is a better way to increase capacity than increasing depth or width
+
+#### DenseNet
+
+- Use Dense block between conv layers
+- Less parameters than ResNet
+
+Next class:
+
+Transformer architectures
diff --git a/pages/CSE559A/_meta.js b/pages/CSE559A/_meta.js
index 703c544..f64bd58 100644
--- a/pages/CSE559A/_meta.js
+++ b/pages/CSE559A/_meta.js
@@ -13,4 +13,5 @@ export default {
     CSE559A_L8: "Computer Vision (Lecture 8)",
     CSE559A_L9: "Computer Vision (Lecture 9)",
     CSE559A_L10: "Computer Vision (Lecture 10)",
+    CSE559A_L11: "Computer Vision (Lecture 11)",
 }