Build a NestJS API for async PDF processing with S3, SQS, and DynamoDB on AWS EC2
Build a NestJS API that serves as a lightweight ingestion/control layer for async PDF processing. The API runs on EC2 (behind ALB), accepts PDF jobs, and delegates processing asynchronously to a worker (ECS Fargate). Files are uploaded directly from client to S3 via presigned URLs, and the API manages job status via SQS + DynamoDB.
```
Client → POST /upload-url → presigned PUT → S3 (inbox/uploads/)
Client → POST /extract {jobId, s3Key} → API → DynamoDB(status=queued) + SQS(Message)
Worker (ECS) reads SQS, fetches PDF from S3 inbox, extracts text, writes result to S3 results, sets DDB status=done
Client → GET /status/:jobId (optional GET /download/:jobId → presigned GET)
```
1. **POST /upload-url** - Generate presigned URL for S3 upload
- Body: `{ filename: string }`
- Response: `{ jobId, key, url }` (presigned PUT for S3 inbox/uploads/)
2. **POST /extract** - Queue PDF extraction job
- Body: `{ jobId: string, s3Key: string, ocr?: boolean, language?: string }`
- Writes DDB `{status:'queued'}` and sends SQS message
- Response: `{ jobId }` (202 Accepted)
3. **GET /status/:jobId** - Check job status
- Response: `{ jobId, status, resultKey?, error? }`
4. **GET /download/:jobId** - Get result download URL
- Response: `{ jobId, url }` (presigned GET for results/)
- Only works when status='done'
5. **GET /health** - ALB health check
- Response: `{ ok: true }`
6. **GET /asset?key=results/...** - Download asset with signed URL
- Query: `key` (must start with `results/`)
- Response: presigned GET URL
Create `src/dto/upload-url.dto.ts`:
```typescript
import { IsString, IsNotEmpty } from 'class-validator';
export class UploadUrlDto {
@IsString()
@IsNotEmpty()
filename: string;
}
```
Create `src/dto/extract.dto.ts`:
```typescript
import { IsString, IsNotEmpty, IsOptional, IsBoolean } from 'class-validator';
export class ExtractDto {
@IsString()
@IsNotEmpty()
jobId: string;
@IsString()
@IsNotEmpty()
s3Key: string;
@IsOptional()
@IsBoolean()
ocr?: boolean;
@IsOptional()
@IsString()
language?: string;
}
```
Update `src/app.service.ts` with AWS SDK v3 integration:
```typescript
import { Injectable, BadRequestException } from '@nestjs/common';
import { S3Client, PutObjectCommand, GetObjectCommand, HeadObjectCommand } from '@aws-sdk/client-s3';
import { SQSClient, SendMessageCommand } from '@aws-sdk/client-sqs';
import { DynamoDBClient, PutItemCommand, GetItemCommand } from '@aws-sdk/client-dynamodb';
import { getSignedUrl } from '@aws-sdk/s3-request-presigner';
import { v4 as uuidv4 } from 'uuid';
@Injectable()
export class AppService {
private s3Client: S3Client;
private sqsClient: SQSClient;
private dynamoClient: DynamoDBClient;
constructor() {
const region = process.env.AWS_REGION || 'eu-north-1';
this.s3Client = new S3Client({ region });
this.sqsClient = new SQSClient({ region });
this.dynamoClient = new DynamoDBClient({ region });
}
async createUploadUrl(filename: string) {
const jobId = uuidv4();
const safeName = filename.replace(/[^a-zA-Z0-9._-]/g, '_');
const key = `uploads/${jobId}_${safeName}`;
const command = new PutObjectCommand({
Bucket: process.env.INBOX_BUCKET,
Key: key,
ContentType: 'application/pdf'
});
const url = await getSignedUrl(this.s3Client, command, { expiresIn: 900 });
return { jobId, key, url };
}
async enqueueJob(jobId: string, s3Key: string, ocr?: boolean, language?: string) {
// Validate s3Key starts with uploads/
if (!s3Key.startsWith('uploads/')) {
throw new BadRequestException('s3Key must start with uploads/');
}
// Verify object exists in S3
try {
await this.s3Client.send(new HeadObjectCommand({
Bucket: process.env.INBOX_BUCKET,
Key: s3Key
}));
} catch (error) {
if (error.name === 'NotFound') {
throw new BadRequestException('Object not found');
}
throw error;
}
// Write to DynamoDB
await this.dynamoClient.send(new PutItemCommand({
TableName: process.env.TABLE_NAME,
Item: {
jobId: { S: jobId },
status: { S: 'queued' },
s3Key: { S: s3Key },
createdAt: { S: new Date().toISOString() }
}
}));
// Send SQS message
await this.sqsClient.send(new SendMessageCommand({
QueueUrl: process.env.QUEUE_URL,
MessageBody: JSON.stringify({
jobId,
s3Key,
bucket: process.env.INBOX_BUCKET,
resultsBucket: process.env.RESULTS_BUCKET,
language,
options: { ocr, language }
})
}));
return { jobId };
}
async getStatus(jobId: string) {
const result = await this.dynamoClient.send(new GetItemCommand({
TableName: process.env.TABLE_NAME,
Key: { jobId: { S: jobId } }
}));
if (!result.Item) {
throw new BadRequestException('Job not found');
}
return {
jobId,
status: result.Item.status?.S,
resultKey: result.Item.resultKey?.S,
error: result.Item.error?.S
};
}
async getResultDownloadUrl(jobId: string) {
const status = await this.getStatus(jobId);
if (status.status !== 'done') {
throw new BadRequestException('Job not completed');
}
if (!status.resultKey) {
throw new BadRequestException('No result available');
}
const command = new GetObjectCommand({
Bucket: process.env.RESULTS_BUCKET,
Key: status.resultKey
});
const url = await getSignedUrl(this.s3Client, command, { expiresIn: 900 });
return { jobId, url };
}
async getAssetSignedUrl(key: string) {
// Validate key starts with results/
if (!key.startsWith('results/')) {
throw new BadRequestException('Key must start with results/');
}
const command = new GetObjectCommand({
Bucket: process.env.RESULTS_BUCKET,
Key: key
});
const url = await getSignedUrl(this.s3Client, command, { expiresIn: 900 });
return { url };
}
health() {
return { ok: true };
}
}
```
Update `src/app.controller.ts`:
```typescript
import { Controller, Get, Post, Body, Param, Query, BadRequestException } from '@nestjs/common';
import { AppService } from './app.service';
import { UploadUrlDto } from './dto/upload-url.dto';
import { ExtractDto } from './dto/extract.dto';
@Controller()
export class AppController {
constructor(private readonly appService: AppService) {}
@Post('upload-url')
async uploadUrl(@Body() dto: UploadUrlDto) {
return this.appService.createUploadUrl(dto.filename);
}
@Post('extract')
async extract(@Body() dto: ExtractDto) {
return this.appService.enqueueJob(dto.jobId, dto.s3Key, dto.ocr, dto.language);
}
@Get('status/:jobId')
async status(@Param('jobId') jobId: string) {
if (!jobId) {
throw new BadRequestException('jobId is required');
}
return this.appService.getStatus(jobId);
}
@Get('download/:jobId')
async download(@Param('jobId') jobId: string) {
if (!jobId) {
throw new BadRequestException('jobId is required');
}
return this.appService.getResultDownloadUrl(jobId);
}
@Get('asset')
async asset(@Query('key') key: string) {
if (!key) {
throw new BadRequestException('key query parameter is required');
}
return this.appService.getAssetSignedUrl(key);
}
@Get('health')
health() {
return this.appService.health();
}
}
```
Update `src/main.ts`:
```typescript
import { NestFactory } from '@nestjs/core';
import { ValidationPipe } from '@nestjs/common';
import { AppModule } from './app.module';
async function bootstrap() {
const app = await NestFactory.create(AppModule);
app.useGlobalPipes(new ValidationPipe({
whitelist: true,
transform: true
}));
await app.listen(3000, '0.0.0.0');
}
bootstrap();
```
```bash
npm install @aws-sdk/client-s3 @aws-sdk/client-sqs @aws-sdk/client-dynamodb @aws-sdk/s3-request-presigner uuid
npm install --save-dev @types/uuid
```
Create `.env` file for local development (never commit):
```
AWS_REGION=eu-north-1
INBOX_BUCKET=leitnerai-inbox-7634-8705-3303
RESULTS_BUCKET=leitnerai-results-7634-8705-3303
QUEUE_URL=https://sqs.eu-north-1.amazonaws.com/763487053303/leitnerai-jobs
TABLE_NAME=leitnerai-jobs
```
On EC2, these are set via `user_data.sh` in `/etc/environment`.
EC2 instance role needs:
Build and verify:
```bash
npm run build
npm run lint
```
Start development server:
```bash
npm run start:dev
```
Test endpoints:
```bash
curl http://localhost:3000/health
curl -X POST http://localhost:3000/upload-url \
-H "Content-Type: application/json" \
-d '{"filename":"test.pdf"}'
curl -X POST http://localhost:3000/extract \
-H "Content-Type: application/json" \
-d '{"jobId":"<uuid>","s3Key":"uploads/<key>.pdf","ocr":true}'
curl http://localhost:3000/status/<jobId>
curl http://localhost:3000/download/<jobId>
```
The API deploys to EC2 behind ALB:
Production build:
```bash
npm ci
npm run build
npm prune --production
```
Leave a review
No reviews yet. Be the first to review this skill!
# Download SKILL.md from killerskills.ai/api/skills/nestjs-pdf-processing-api/raw