jbilcke-hf HF staff commited on
Commit
4c069c7
β€’
1 Parent(s): 95a4e14

not ready yet

Browse files
src/app/engine/render.ts CHANGED
@@ -369,7 +369,7 @@ export async function newRender({
369
  },
370
  body: JSON.stringify({
371
  prompt,
372
- // negativePrompt, unused for now
373
 
374
  // for a future version of the comic factory
375
  identityImage: "",
 
369
  },
370
  body: JSON.stringify({
371
  prompt,
372
+ negativePrompt: "speech, bubble, speech bubble, caption",
373
 
374
  // for a future version of the comic factory
375
  identityImage: "",
src/app/interface/panel/index.tsx CHANGED
@@ -96,12 +96,16 @@ export function Panel({
96
 
97
  let delay = enableRateLimiter ? (1000 + (500 * panelIndex)) : 1000
98
 
 
 
99
  const addSpeechBubble = async () => {
100
  if (!renderedRef.current) { return }
101
 
102
  // story generation failed
103
  if (speech.trim() === "...") { return }
104
 
 
 
105
  console.log('Generating speech bubble...')
106
  try {
107
  const result = await injectSpeechBubbleInTheBackground({
 
96
 
97
  let delay = enableRateLimiter ? (1000 + (500 * panelIndex)) : 1000
98
 
99
+ const isBeta = false
100
+
101
  const addSpeechBubble = async () => {
102
  if (!renderedRef.current) { return }
103
 
104
  // story generation failed
105
  if (speech.trim() === "...") { return }
106
 
107
+ if (!isBeta) { return }
108
+
109
  console.log('Generating speech bubble...')
110
  try {
111
  const result = await injectSpeechBubbleInTheBackground({
src/lib/bubble/injectSpeechBubbleInTheBackground.ts CHANGED
@@ -1,9 +1,21 @@
1
  import { ImageSegmenter, FilesetResolver } from "@mediapipe/tasks-vision"
2
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  export async function injectSpeechBubbleInTheBackground(params: {
4
  inputImageInBase64: string;
5
  text?: string;
6
- shape?: "oval" | "rectangular" | "cloud" | "thought";
7
  line?: "handdrawn" | "straight" | "bubble" | "chaotic";
8
  font?: string;
9
  debug?: boolean;
@@ -17,22 +29,17 @@ export async function injectSpeechBubbleInTheBackground(params: {
17
  debug = false,
18
  } = params;
19
 
20
- // If no text is provided, return the original image
21
  if (!text) {
22
  return inputImageInBase64;
23
  }
24
 
25
- // Load the image
26
  const image = await loadImage(inputImageInBase64);
27
-
28
- // Set up canvas
29
  const canvas = document.createElement('canvas');
30
  canvas.width = image.width;
31
  canvas.height = image.height;
32
  const ctx = canvas.getContext('2d')!;
33
  ctx.drawImage(image, 0, 0);
34
 
35
- // Set up MediaPipe Image Segmenter
36
  const vision = await FilesetResolver.forVisionTasks(
37
  "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
38
  );
@@ -46,30 +53,28 @@ export async function injectSpeechBubbleInTheBackground(params: {
46
  });
47
 
48
  const segmentationResult = imageSegmenter.segment(image);
49
- let characterBoundingBox: { top: number, left: number, width: number, height: number } | null = null;
50
 
51
  if (segmentationResult.categoryMask) {
52
  const mask = segmentationResult.categoryMask.getAsUint8Array();
53
- const detectedItems = analyzeSegmentationMask(mask, image.width, image.height);
54
- console.log("Detected items:", detectedItems);
55
-
56
- if (detectedItems.length > 0) {
57
- characterBoundingBox = findCharacterBoundingBox(mask, image.width, image.height);
58
- }
59
 
60
  if (debug) {
61
  drawSegmentationMask(ctx, mask, image.width, image.height);
62
  }
63
  }
64
 
65
- const bubbleLocation = characterBoundingBox
66
- ? { x: characterBoundingBox.left + characterBoundingBox.width / 2, y: characterBoundingBox.top }
67
- : { x: image.width / 2, y: image.height / 2 };
68
 
69
- drawSpeechBubble(ctx, bubbleLocation, text, shape, line, font, !!characterBoundingBox, image.width, image.height, characterBoundingBox);
 
 
 
70
 
71
  return canvas.toDataURL('image/png');
72
  }
 
73
  function loadImage(base64: string): Promise<HTMLImageElement> {
74
  return new Promise((resolve, reject) => {
75
  const img = new Image();
@@ -79,7 +84,26 @@ function loadImage(base64: string): Promise<HTMLImageElement> {
79
  });
80
  }
81
 
82
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
  function analyzeSegmentationMask(mask: Uint8Array, width: number, height: number): string[] {
85
  const categories = new Set<number>();
@@ -91,21 +115,47 @@ function analyzeSegmentationMask(mask: Uint8Array, width: number, height: number
91
  return Array.from(categories).map(c => `unknown-${c}`);
92
  }
93
 
94
- function findMainCharacterLocation(mask: Uint8Array, width: number, height: number): { x: number, y: number } {
95
- let sumX = 0, sumY = 0, count = 0;
96
- for (let y = 0; y < height; y++) {
97
- for (let x = 0; x < width; x++) {
98
- const index = y * width + x;
99
- if (mask[index] > 0) {
100
- sumX += x;
101
- sumY += y;
102
- count++;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  }
104
- }
 
 
105
  }
106
- return count > 0 ? { x: sumX / count, y: sumY / count } : { x: width / 2, y: height / 2 };
 
107
  }
108
 
 
 
 
 
109
 
110
  function drawSegmentationMask(ctx: CanvasRenderingContext2D, mask: Uint8Array, width: number, height: number) {
111
  const imageData = ctx.getImageData(0, 0, width, height);
@@ -159,45 +209,48 @@ function drawSpeechBubble(
159
  shape: "oval" | "rectangular" | "cloud" | "thought",
160
  line: "handdrawn" | "straight" | "bubble" | "chaotic",
161
  font: string,
162
- characterDetected: boolean,
163
  imageWidth: number,
164
- imageHeight: number,
165
- characterBoundingBox: { top: number, left: number, width: number, height: number } | null
166
  ) {
167
  const bubbleWidth = Math.min(300, imageWidth * 0.4);
168
  const bubbleHeight = Math.min(150, imageHeight * 0.3);
169
  const padding = 20;
170
 
171
- const fontSize = Math.max(15, Math.min(30, 500 / text.length)); // Increased font size by 25%
172
  ctx.font = `${fontSize}px ${font}`;
173
 
174
- const wrappedText = wrapText(ctx, text, bubbleWidth - padding * 2);
175
- const textDimensions = measureTextDimensions(ctx, wrappedText);
176
 
177
  const finalWidth = Math.max(bubbleWidth, textDimensions.width + padding * 2);
178
  const finalHeight = Math.max(bubbleHeight, textDimensions.height + padding * 2);
179
 
180
  const bubbleLocation = {
181
- x: Math.max(finalWidth / 2, Math.min(imageWidth - finalWidth / 2, location.x)),
182
- y: Math.max(finalHeight / 2, Math.min(imageHeight - finalHeight / 2, location.y - finalHeight))
183
  };
184
 
185
  ctx.fillStyle = 'white';
186
  ctx.strokeStyle = 'black';
187
  ctx.lineWidth = 2;
188
 
 
 
 
 
 
 
 
 
189
  ctx.beginPath();
190
- drawBubbleShape(ctx, shape, bubbleLocation, finalWidth, finalHeight, location);
191
  ctx.fill();
192
  ctx.stroke();
193
 
194
- applyLineStyle(ctx, line);
195
-
196
- const tailTarget = characterBoundingBox
197
- ? { x: characterBoundingBox.left + characterBoundingBox.width / 2, y: characterBoundingBox.top + characterBoundingBox.height * 0.2 }
198
- : location;
199
-
200
- drawTail(ctx, bubbleLocation, finalWidth, finalHeight, tailTarget, shape);
201
 
202
  ctx.fillStyle = 'black';
203
  ctx.textAlign = 'center';
@@ -211,7 +264,7 @@ function drawBubbleShape(
211
  bubbleLocation: { x: number, y: number },
212
  width: number,
213
  height: number,
214
- tailTarget: { x: number, y: number }
215
  ) {
216
  switch (shape) {
217
  case "oval":
@@ -295,79 +348,45 @@ function drawThoughtBubble(ctx: CanvasRenderingContext2D, location: { x: number,
295
  function drawTail(
296
  ctx: CanvasRenderingContext2D,
297
  bubbleLocation: { x: number, y: number },
298
- width: number,
299
- height: number,
300
  tailTarget: { x: number, y: number },
301
  shape: string
302
  ) {
303
- const tailLength = Math.min(50, height / 2);
304
- const startX = bubbleLocation.x + (tailTarget.x > bubbleLocation.x ? width / 4 : -width / 4);
305
- const startY = bubbleLocation.y + height / 2;
306
-
307
  ctx.beginPath();
308
- ctx.moveTo(startX, startY);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
 
310
- if (shape === "thought") {
311
- const bubbleCount = 3;
312
- for (let i = 0; i < bubbleCount; i++) {
313
- const t = (i + 1) / (bubbleCount + 1);
314
- const x = startX + (tailTarget.x - startX) * t;
315
- const y = startY + (tailTarget.y - startY) * t;
316
- const radius = 5 * (1 - t);
317
- ctx.lineTo(x - radius, y);
318
- ctx.arc(x, y, radius, 0, Math.PI * 2);
319
- }
320
- } else {
321
- const controlX = (startX + tailTarget.x) / 2;
322
- const controlY = (startY + tailTarget.y + 20) / 2;
323
- ctx.quadraticCurveTo(controlX, controlY, tailTarget.x, tailTarget.y);
324
- ctx.quadraticCurveTo(controlX, controlY, startX + (tailTarget.x > bubbleLocation.x ? -10 : 10), startY);
325
- }
326
  ctx.closePath();
327
  ctx.fill();
328
  ctx.stroke();
329
  }
330
 
331
- function findCharacterBoundingBox(mask: Uint8Array, width: number, height: number): { top: number, left: number, width: number, height: number } {
332
- let minX = width, minY = height, maxX = 0, maxY = 0;
333
- for (let y = 0; y < height; y++) {
334
- for (let x = 0; x < width; x++) {
335
- const index = y * width + x;
336
- if (mask[index] > 0) {
337
- minX = Math.min(minX, x);
338
- minY = Math.min(minY, y);
339
- maxX = Math.max(maxX, x);
340
- maxY = Math.max(maxY, y);
341
- }
342
- }
343
- }
344
- return {
345
- top: minY,
346
- left: minX,
347
- width: maxX - minX,
348
- height: maxY - minY
349
- };
350
- }
351
-
352
- function applyLineStyle(ctx: CanvasRenderingContext2D, style: string) {
353
- switch (style) {
354
- case "handdrawn":
355
- ctx.setLineDash([5, 5]);
356
- break;
357
- case "straight":
358
- ctx.setLineDash([]);
359
- break;
360
- case "bubble":
361
- ctx.setLineDash([0, 10]);
362
- ctx.lineCap = "round";
363
- break;
364
- case "chaotic":
365
- ctx.setLineDash([10, 5, 2, 5]);
366
- break;
367
- }
368
- }
369
-
370
- function wrapText(ctx: CanvasRenderingContext2D, text: string, maxWidth: number): string[] {
371
  const words = text.split(' ');
372
  const lines: string[] = [];
373
  let currentLine = '';
@@ -376,7 +395,7 @@ function wrapText(ctx: CanvasRenderingContext2D, text: string, maxWidth: number)
376
  const testLine = currentLine + (currentLine ? ' ' : '') + word;
377
  const metrics = ctx.measureText(testLine);
378
 
379
- if (metrics.width > maxWidth || word.endsWith('.') || word.endsWith(',')) {
380
  lines.push(currentLine);
381
  currentLine = word;
382
  } else {
@@ -391,10 +410,8 @@ function wrapText(ctx: CanvasRenderingContext2D, text: string, maxWidth: number)
391
  return lines;
392
  }
393
 
394
-
395
- function measureTextDimensions(ctx: CanvasRenderingContext2D, lines: string[]): { width: number, height: number } {
396
  let maxWidth = 0;
397
- const lineHeight = ctx.measureText('M').width * 1.2;
398
  const height = lineHeight * lines.length;
399
 
400
  for (const line of lines) {
@@ -405,15 +422,13 @@ function measureTextDimensions(ctx: CanvasRenderingContext2D, lines: string[]):
405
  return { width: maxWidth, height };
406
  }
407
 
408
- function drawFormattedText(ctx: CanvasRenderingContext2D, lines: string[], x: number, y: number, maxWidth: number, fontSize: number) {
409
- const lineHeight = fontSize * 1.2;
410
  const totalHeight = lineHeight * lines.length;
411
  let startY = y - totalHeight / 2 + lineHeight / 2;
412
 
413
  for (let i = 0; i < lines.length; i++) {
414
  const line = lines[i];
415
  const lineY = startY + i * lineHeight;
416
- const maxLineWidth = Math.min(maxWidth, maxWidth * (1 - Math.abs(i - (lines.length - 1) / 2) / lines.length));
417
- ctx.fillText(line, x, lineY, maxLineWidth);
418
  }
419
  }
 
1
  import { ImageSegmenter, FilesetResolver } from "@mediapipe/tasks-vision"
2
 
3
+ interface BoundingBox {
4
+ top: number;
5
+ left: number;
6
+ width: number;
7
+ height: number;
8
+ }
9
+
10
+ /**
11
+ * Injects speech bubbles into the background of an image.
12
+ * @param params - The parameters for injecting speech bubbles.
13
+ * @returns A Promise that resolves to a base64-encoded string of the modified image.
14
+ */
15
  export async function injectSpeechBubbleInTheBackground(params: {
16
  inputImageInBase64: string;
17
  text?: string;
18
+ shape?: "oval" | "rectangular" | "cloud" | "thought";
19
  line?: "handdrawn" | "straight" | "bubble" | "chaotic";
20
  font?: string;
21
  debug?: boolean;
 
29
  debug = false,
30
  } = params;
31
 
 
32
  if (!text) {
33
  return inputImageInBase64;
34
  }
35
 
 
36
  const image = await loadImage(inputImageInBase64);
 
 
37
  const canvas = document.createElement('canvas');
38
  canvas.width = image.width;
39
  canvas.height = image.height;
40
  const ctx = canvas.getContext('2d')!;
41
  ctx.drawImage(image, 0, 0);
42
 
 
43
  const vision = await FilesetResolver.forVisionTasks(
44
  "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
45
  );
 
53
  });
54
 
55
  const segmentationResult = imageSegmenter.segment(image);
56
+ let characterBoundingBox: BoundingBox | null = null;
57
 
58
  if (segmentationResult.categoryMask) {
59
  const mask = segmentationResult.categoryMask.getAsUint8Array();
60
+ characterBoundingBox = findCharacterBoundingBox(mask, image.width, image.height);
 
 
 
 
 
61
 
62
  if (debug) {
63
  drawSegmentationMask(ctx, mask, image.width, image.height);
64
  }
65
  }
66
 
67
+ const bubbles = splitTextIntoBubbles(text);
68
+ const bubbleLocations = calculateBubbleLocations(bubbles.length, image.width, image.height, characterBoundingBox);
 
69
 
70
+ bubbles.forEach((bubbleText, index) => {
71
+ const bubbleLocation = bubbleLocations[index];
72
+ drawSpeechBubble(ctx, bubbleLocation, bubbleText, shape, line, font, characterBoundingBox, image.width, image.height);
73
+ });
74
 
75
  return canvas.toDataURL('image/png');
76
  }
77
+
78
  function loadImage(base64: string): Promise<HTMLImageElement> {
79
  return new Promise((resolve, reject) => {
80
  const img = new Image();
 
84
  });
85
  }
86
 
87
+ function findCharacterBoundingBox(mask: Uint8Array, width: number, height: number): BoundingBox {
88
+ let minX = width, minY = height, maxX = 0, maxY = 0;
89
+ for (let y = 0; y < height; y++) {
90
+ for (let x = 0; x < width; x++) {
91
+ const index = y * width + x;
92
+ if (mask[index] > 0) {
93
+ minX = Math.min(minX, x);
94
+ minY = Math.min(minY, y);
95
+ maxX = Math.max(maxX, x);
96
+ maxY = Math.max(maxY, y);
97
+ }
98
+ }
99
+ }
100
+ return {
101
+ top: minY,
102
+ left: minX,
103
+ width: maxX - minX,
104
+ height: maxY - minY
105
+ };
106
+ }
107
 
108
  function analyzeSegmentationMask(mask: Uint8Array, width: number, height: number): string[] {
109
  const categories = new Set<number>();
 
115
  return Array.from(categories).map(c => `unknown-${c}`);
116
  }
117
 
118
+ function splitTextIntoBubbles(text: string): string[] {
119
+ const sentences = text.match(/[^.!?]+[.!?]+/g) || [text];
120
+ return sentences.map(sentence => sentence.trim());
121
+ }
122
+
123
+ function calculateBubbleLocations(
124
+ bubbleCount: number,
125
+ imageWidth: number,
126
+ imageHeight: number,
127
+ characterBoundingBox: BoundingBox | null
128
+ ): { x: number, y: number }[] {
129
+ const locations: { x: number, y: number }[] = [];
130
+ const padding = 50;
131
+ const availableWidth = imageWidth - padding * 2;
132
+ const availableHeight = imageHeight - padding * 2;
133
+ const maxAttempts = 50;
134
+
135
+ for (let i = 0; i < bubbleCount; i++) {
136
+ let x, y;
137
+ let attempts = 0;
138
+ do {
139
+ x = Math.random() * availableWidth + padding;
140
+ y = (i / bubbleCount) * availableHeight + padding;
141
+ attempts++;
142
+
143
+ if (attempts >= maxAttempts) {
144
+ console.warn(`Could not find non-overlapping position for bubble ${i} after ${maxAttempts} attempts.`);
145
+ break;
146
  }
147
+ } while (characterBoundingBox && isOverlapping({ x, y }, characterBoundingBox));
148
+
149
+ locations.push({ x, y });
150
  }
151
+
152
+ return locations;
153
  }
154
 
155
+ function isOverlapping(point: { x: number, y: number }, box: BoundingBox): boolean {
156
+ return point.x >= box.left && point.x <= box.left + box.width &&
157
+ point.y >= box.top && point.y <= box.top + box.height;
158
+ }
159
 
160
  function drawSegmentationMask(ctx: CanvasRenderingContext2D, mask: Uint8Array, width: number, height: number) {
161
  const imageData = ctx.getImageData(0, 0, width, height);
 
209
  shape: "oval" | "rectangular" | "cloud" | "thought",
210
  line: "handdrawn" | "straight" | "bubble" | "chaotic",
211
  font: string,
212
+ characterBoundingBox: BoundingBox | null,
213
  imageWidth: number,
214
+ imageHeight: number
 
215
  ) {
216
  const bubbleWidth = Math.min(300, imageWidth * 0.4);
217
  const bubbleHeight = Math.min(150, imageHeight * 0.3);
218
  const padding = 20;
219
 
220
+ const fontSize = 20;
221
  ctx.font = `${fontSize}px ${font}`;
222
 
223
+ const wrappedText = wrapText(ctx, text, bubbleWidth - padding * 2, fontSize);
224
+ const textDimensions = measureTextDimensions(ctx, wrappedText, fontSize);
225
 
226
  const finalWidth = Math.max(bubbleWidth, textDimensions.width + padding * 2);
227
  const finalHeight = Math.max(bubbleHeight, textDimensions.height + padding * 2);
228
 
229
  const bubbleLocation = {
230
+ x: Math.max(finalWidth / 2 + padding, Math.min(imageWidth - finalWidth / 2 - padding, location.x)),
231
+ y: Math.max(finalHeight / 2 + padding, Math.min(imageHeight - finalHeight / 2 - padding, location.y))
232
  };
233
 
234
  ctx.fillStyle = 'white';
235
  ctx.strokeStyle = 'black';
236
  ctx.lineWidth = 2;
237
 
238
+ let tailTarget = null;
239
+ if (characterBoundingBox) {
240
+ tailTarget = {
241
+ x: characterBoundingBox.left + characterBoundingBox.width / 2,
242
+ y: characterBoundingBox.top + characterBoundingBox.height * 0.2
243
+ };
244
+ }
245
+
246
  ctx.beginPath();
247
+ drawBubbleShape(ctx, shape, bubbleLocation, finalWidth, finalHeight, tailTarget);
248
  ctx.fill();
249
  ctx.stroke();
250
 
251
+ if (tailTarget) {
252
+ drawTail(ctx, bubbleLocation, finalWidth, finalHeight, tailTarget, shape);
253
+ }
 
 
 
 
254
 
255
  ctx.fillStyle = 'black';
256
  ctx.textAlign = 'center';
 
264
  bubbleLocation: { x: number, y: number },
265
  width: number,
266
  height: number,
267
+ tailTarget: { x: number, y: number } | null
268
  ) {
269
  switch (shape) {
270
  case "oval":
 
348
  function drawTail(
349
  ctx: CanvasRenderingContext2D,
350
  bubbleLocation: { x: number, y: number },
351
+ bubbleWidth: number,
352
+ bubbleHeight: number,
353
  tailTarget: { x: number, y: number },
354
  shape: string
355
  ) {
356
+ const tailWidth = 20;
357
+ const tailHeight = 30;
358
+
 
359
  ctx.beginPath();
360
+ ctx.moveTo(bubbleLocation.x, bubbleLocation.y + bubbleHeight / 2);
361
+
362
+ const controlPoint1 = {
363
+ x: bubbleLocation.x + (tailTarget.x - bubbleLocation.x) / 3,
364
+ y: bubbleLocation.y + bubbleHeight / 2
365
+ };
366
+
367
+ const controlPoint2 = {
368
+ x: bubbleLocation.x + (tailTarget.x - bubbleLocation.x) * 2 / 3,
369
+ y: tailTarget.y
370
+ };
371
+
372
+ ctx.bezierCurveTo(
373
+ controlPoint1.x, controlPoint1.y,
374
+ controlPoint2.x, controlPoint2.y,
375
+ tailTarget.x, tailTarget.y
376
+ );
377
+
378
+ ctx.bezierCurveTo(
379
+ controlPoint2.x + tailWidth, controlPoint2.y,
380
+ controlPoint1.x + tailWidth, controlPoint1.y,
381
+ bubbleLocation.x + tailWidth, bubbleLocation.y + bubbleHeight / 2
382
+ );
383
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
  ctx.closePath();
385
  ctx.fill();
386
  ctx.stroke();
387
  }
388
 
389
+ function wrapText(ctx: CanvasRenderingContext2D, text: string, maxWidth: number, lineHeight: number): string[] {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
390
  const words = text.split(' ');
391
  const lines: string[] = [];
392
  let currentLine = '';
 
395
  const testLine = currentLine + (currentLine ? ' ' : '') + word;
396
  const metrics = ctx.measureText(testLine);
397
 
398
+ if (metrics.width > maxWidth) {
399
  lines.push(currentLine);
400
  currentLine = word;
401
  } else {
 
410
  return lines;
411
  }
412
 
413
+ function measureTextDimensions(ctx: CanvasRenderingContext2D, lines: string[], lineHeight: number): { width: number, height: number } {
 
414
  let maxWidth = 0;
 
415
  const height = lineHeight * lines.length;
416
 
417
  for (const line of lines) {
 
422
  return { width: maxWidth, height };
423
  }
424
 
425
+ function drawFormattedText(ctx: CanvasRenderingContext2D, lines: string[], x: number, y: number, maxWidth: number, lineHeight: number) {
 
426
  const totalHeight = lineHeight * lines.length;
427
  let startY = y - totalHeight / 2 + lineHeight / 2;
428
 
429
  for (let i = 0; i < lines.length; i++) {
430
  const line = lines[i];
431
  const lineY = startY + i * lineHeight;
432
+ ctx.fillText(line, x, lineY, maxWidth);
 
433
  }
434
  }