Upload 22 files

Browse files

Files changed (23) hide show

.gitattributes +9 -0
README.md +271 -0
config.yaml +82 -0
samples/1739028394882__000003200_9.jpg +3 -0
samples/1739028394882__000003200_9.jpgZone.Identifier +0 -0
samples/1739030252616__000003400_0.jpg +3 -0
samples/1739030252616__000003400_0.jpgZone.Identifier +0 -0
samples/1739030367779__000003400_1.jpg +3 -0
samples/1739030367779__000003400_1.jpgZone.Identifier +0 -0
samples/1739030480214__000003400_2.jpg +3 -0
samples/1739030480214__000003400_2.jpgZone.Identifier +0 -0
samples/1739030595357__000003400_3.jpg +3 -0
samples/1739030595357__000003400_3.jpgZone.Identifier +0 -0
samples/1739030707660__000003400_4.jpg +0 -0
samples/1739030707660__000003400_4.jpgZone.Identifier +0 -0
samples/1739030822778__000003400_5.jpg +3 -0
samples/1739030822778__000003400_5.jpgZone.Identifier +0 -0
samples/1739030937356__000003400_6.jpg +3 -0
samples/1739030937356__000003400_6.jpgZone.Identifier +0 -0
samples/1739031049963__000003400_7.jpg +3 -0
samples/1739031049963__000003400_7.jpgZone.Identifier +0 -0
samples/1739031164540__000003400_8.jpg +3 -0
samples/1739031164540__000003400_8.jpgZone.Identifier +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,12 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+samples/1739028394882__000003200_9.jpg filter=lfs diff=lfs merge=lfs -text
+samples/1739030252616__000003400_0.jpg filter=lfs diff=lfs merge=lfs -text
+samples/1739030367779__000003400_1.jpg filter=lfs diff=lfs merge=lfs -text
+samples/1739030480214__000003400_2.jpg filter=lfs diff=lfs merge=lfs -text
+samples/1739030595357__000003400_3.jpg filter=lfs diff=lfs merge=lfs -text
+samples/1739030822778__000003400_5.jpg filter=lfs diff=lfs merge=lfs -text
+samples/1739030937356__000003400_6.jpg filter=lfs diff=lfs merge=lfs -text
+samples/1739031049963__000003400_7.jpg filter=lfs diff=lfs merge=lfs -text
+samples/1739031164540__000003400_8.jpg filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,271 @@

+---
+tags:
+- text-to-image
+- flux
+- lora
+- diffusers
+- template:sd-lora
+widget:
+- text: 'A photorealistic business headshot of [token], a caucasian man in his 40s
+        exuding confidence in a modern office setting.'
+  output:
+    url: samples/1739030252616__000003400_0.jpg
+- text: 'A professional portrait of [token], a caucasian man in his 40s and full head
+        of hair, dressed sharply in a navy suit, captured with soft natural light.'
+  output:
+    url: samples/1739030367779__000003400_1.jpg
+- text: 'A high-quality headshot of [token], a caucasian man in his 40s and full head
+        of hair, taken with an 85mm lens, emphasizing realism and authority.'
+  output:
+    url: samples/1739030480214__000003400_2.jpg
+- text: 'A corporate headshot of [token] , a caucasian man in his 40s, standing before
+        a sleek office backdrop with a confident expression.'
+  output:
+    url: samples/1739030595357__000003400_3.jpg
+- text: 'An executive portrait of [token], a caucasian man in his 40s and full head
+        of hair, softly lit, showcasing subtle facial details and approachability.'
+  output:
+    url: samples/1739030707660__000003400_4.jpg
+- text: 'A detailed business headshot of [token], a caucasian man in his 40s and full
+        head of hair, framed with a blurred modern office environment.'
+  output:
+    url: samples/1739030822778__000003400_5.jpg
+- text: 'A professional close-up of [token], a caucasian man in his 40s and full head
+        of hair, using a shallow depth of field to highlight facial authenticity.'
+  output:
+    url: samples/1739030937356__000003400_6.jpg
+- text: 'A crisp and polished portrait of [token], a caucasian man in his 40s and full
+        head of hair, captured in a well-lit professional setting.'
+  output:
+    url: samples/1739031049963__000003400_7.jpg
+- text: 'A corporate-style image of [token], a caucasian man in his 40s and full head
+        of hair, with a slight smile, reinforcing trust and professionalism'
+  output:
+    url: samples/1739031164540__000003400_8.jpg
+- text: 'A photorealistic shot of [token], a caucasian man in his 40s and full head
+        of hair, taken with a Nikon D850, emphasizing clarity and natural texture.'
+  output:
+    url: samples/1739031276857__000003400_9.jpg
+base_model: black-forest-labs/FLUX.1-dev
+instance_prompt: steve
+license: other
+license_name: flux-1-dev-non-commercial-license
+license_link: https://huggingface.co/black-forest-labs/FLUX.1-dev/blob/main/LICENSE.md
+---
+# steve_lora_flux_1_dev_v1.2
+A LoRA-based Stable Diffusion model trained to generate images of a man named “Steve” in a wide variety of scenarios. This model is fine-tuned from **[black-forest-labs/FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev)** using a Flow Matching–based noise scheduler.
+<Gallery />
+## Trigger Words
+Use `steve` in your prompt to activate the specific style and character details for this LoRA.
+## Model Information
+- **LoRA Rank / Alpha**: 16 / 16
+- **Number of Steps**: 4000
+- **Batch Size**: 1
+- **Learning Rate**: 0.0001
+- **Noise Scheduler**: `flowmatch`
+- **Optimizer**: `adamw8bit`
+- **Precision**: `bf16`
+- **Gradient Checkpointing**: true
+- **EMA**: true (decay = 0.99)
+- **Quantization**: enabled
+## How to Use
+This LoRA can be merged or applied to the **FLUX.1-dev** base model through [Diffusers](https://github.com/huggingface/diffusers) or a compatible UI/tool.
+Example pseudocode:
+```python
+from diffusers import StableDiffusionPipeline
+import torch
+base_model = "black-forest-labs/FLUX.1-dev"
+lora_model = "YOUR_USERNAME/steve_lora_flux_1_dev_v1.2"
+pipe = StableDiffusionPipeline.from_pretrained(base_model, torch_dtype=torch.float16).to("cuda")
+# Load your LoRA weights (implementation depends on the UI or method)
+# pipe.load_lora_weights(lora_model)  # Example call
+prompt = "steve, man lounging in fitted athletic wear on crisp white linens, strong and confident"
+image = pipe(prompt).images[0]
+image.save("steve_example.jpg")
+```
+## Download Model
+Weights for this LoRA are available in Safetensors format.
+Download them from the Files & versions tab.
+## License
+This model is provided under a flux-1-dev-non-commercial-license. Please review the license file for details on acceptable use.
+## Acknowledgements
+Trained with AI Toolkit by Ostris
+Based on the FLUX.1-dev base model
+## Disclaimer:
+Use responsibly. This model is intended for artistic, non-commercial purposes. The creators are not responsible for any misuse, generation of disallowed content, or potential harm caused by outputs. Always review and curate model outputs before sharing.
+# steveant/steve-lora-v1.2
+This is a [LoRA](https://arxiv.org/abs/2106.09685)-based Stable Diffusion model fine-tuned on a custom image dataset to generate images featuring a man named “Steve” in various settings and scenarios. It has been trained using the [FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev) base model, leveraging a Flow Matching–based noise scheduler and LoRA network adapters.
+> **Note:** This model is in version `v1.2` and is currently considered experimental.
+---
+## Model Details
+- **Model type**: LoRA adapter for Stable Diffusion (`sd_trainer`)
+- **Trigger word**: `steve`
+- **Base model**: [black-forest-labs/FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev)
+- **Network**: LoRA (rank: 16, alpha: 16)
+- **Quantization**: Enabled
+- **Datasets**: Private dataset containing images and associated textual captions.
+### Model Architecture and Training
+This LoRA was trained using the following key parameters:
+- **Training steps**: `4000`
+- **Batch size**: `1`
+- **Gradient accumulation steps**: `1`
+- **Learning rate**: `0.0001`
+- **Noise Scheduler**: `flowmatch`
+- **Optimizer**: `adamw8bit`
+- **Precision**: `bf16`
+- **LoRA settings**:
+  - Linear rank: `16`
+  - Linear alpha: `16`
+- **Sampling configuration** (for sample images):
+  - Sampler: `flowmatch`
+  - Resolution: `1200 x 1600`
+  - Guidance scale: `4.1`
+  - Sample steps: `29`
+During training, image captions were drawn from `.txt` files. Some techniques applied include:
+- **Caption dropout**: `0.00`
+- **Token shuffling**: `true`
+- **Gradient checkpointing**: `true`
+- **Exponential moving average**: `use_ema = true` with `ema_decay = 0.99`
+---
+## Intended Use
+This model is intended to generate images of a “Steve” character in various poses, outfits, and scenarios. Possible use cases include:
+- Creative media and content generation
+- Character concepting for artistic projects
+- Test and experimentation with Flow Matching–based schedulers in Stable Diffusion
+> **Important**: This model is not intended to generate explicit or harmful content. Users are advised to comply with local regulations and handle outputs responsibly.
+---
+## How to Use
+1. **Installation**
+   Make sure you have the [Diffusers library](https://github.com/huggingface/diffusers) or another Stable Diffusion–compatible framework installed.
+2. **Loading the Model**
+   ```python
+   from diffusers import StableDiffusionPipeline
+   import torch
+   # Example: Pseudocode for loading the base model + LoRA
+   base_model_id = "black-forest-labs/FLUX.1-dev"
+   lora_model_id = "steveant/steve-lora-v1.2"  # hypothetical path on HF hub
+   pipeline = StableDiffusionPipeline.from_pretrained(base_model_id, torch_dtype=torch.float16).to("cuda")
+   # Load LoRA weights
+   # Typically, you would merge or apply the LoRA as per your chosen library's method.
+   ```
+3. **Prompting**
+   Use the **trigger word** `steve` in your prompt to invoke the specific style or character details. For instance:
+   ```python
+   prompt = (
+       "steve, man lounging in fitted athletic wear on crisp white linens, "
+       "strong and confident expression, warm ambient lighting, full-body shot, "
+       "textured fabric details"
+   )
+   result = pipeline(prompt).images[0]
+   result.save("steve_lounging.png")
+   ```
+4. **Negative Prompting (Optional)**
+   Provide a `neg` (negative) prompt parameter to omit or reduce undesired elements.
+   ```python
+   neg_prompt = "low resolution, bad quality"
+   result = pipeline(prompt=prompt, negative_prompt=neg_prompt).images[0]
+   ```
+---
+## Sample Prompts
+Below are some sample prompts used during training:
+- `A photorealistic business headshot of steve, a Caucasian man in his 40s exuding confidence in a modern office setting.`
+- `A professional portrait of steve, a Caucasian man in his 40s and full head of hair, dressed sharply in a navy suit, captured with soft natural light.`
+- `A high-quality headshot of steve, a Caucasian man in his 40s and full head of hair, taken with an 85mm lens, emphasizing realism and authority.`
+- `A corporate headshot of steve, a Caucasian man in his 40s, standing before a sleek office backdrop with a confident expression.`
+---
+## Limitations and Biases
+- The model’s outputs depend on the style and content of the dataset.
+- Since the training data is limited to “Steve” images in specific scenarios, the model may not generalize well to drastically different contexts.
+- **Bias**: Any biases in the original dataset might be reflected in the generated images.
+---
+## Training Data
+- **Private dataset** of images featuring “Steve,” labeled with text captions.
+- **Resolution** used for latent caching: `720`, `960`, and `1440`.
+- **Data augmentation**: Slight caption dropout, token shuffling, etc.
+---
+## Citation
+If you use this model or find it helpful for your research/projects, please cite:
+```
+@misc{steve_lora_flux_1_dev_v1.2,
+  author = {steveant},
+  title = {steve_lora_flux_1_dev_v1.2 (LoRA model)},
+  year = {2024},
+  howpublished = {\url{https://huggingface.co/steveant/steve-lora-v1.2}},
+}
+```
+---
+## License
+This model and code are available under **CreativeML Open RAIL-M** or your chosen license. Please refer to the [repository’s license](./LICENSE) or contact the author for more details.
+---
+## Contributing
+Contributions are welcome! If you wish to improve this model card or have new use cases and improvements to propose:
+1. Open an issue on the [GitHub/Spaces project](#) (if available).
+2. Submit pull requests or suggestions.
+3. Respect the usage and license guidelines.
+---
+**Disclaimer**:
+This model is for research and educational purposes. Always validate and review images generated to ensure they align with your intended use and do not violate any regulations or ethical standards.

config.yaml ADDED Viewed

	@@ -0,0 +1,82 @@

+job: extension
+config:
+  name: steve_lora_flux_1_dev_v1.2
+  process:
+  - type: sd_trainer
+    training_folder: output
+    performance_log_every: 1000
+    device: cuda:0
+    trigger_word: steve
+    network:
+      type: lora
+      linear: 16
+      linear_alpha: 16
+    save:
+      dtype: float16
+      save_every: 200
+      max_step_saves_to_keep: 4
+      push_to_hub: false
+    datasets:
+    - folder_path: ../images/steve3-lora
+      caption_ext: txt
+      caption_dropout_rate: 0.0
+      shuffle_tokens: true
+      cache_latents_to_disk: true
+      resolution:
+      - 1200
+    train:
+      batch_size: 1
+      steps: 4000
+      gradient_accumulation_steps: 1
+      train_unet: true
+      train_text_encoder: false
+      content_or_style: balanced
+      gradient_checkpointing: true
+      noise_scheduler: flowmatch
+      optimizer: adamw8bit
+      lr: 0.0001
+      skip_first_sample: false
+      linear_timesteps: true
+      ema_config:
+        use_ema: true
+        ema_decay: 0.99
+      dtype: bf16
+    model:
+      name_or_path: black-forest-labs/FLUX.1-dev
+      is_flux: true
+      quantize: true
+    sample:
+      sampler: flowmatch
+      sample_every: 200
+      width: 1200
+      height: 1600
+      prompts:
+      - A photorealistic business headshot of [token], a caucasian man in his 40s
+        exuding confidence in a modern office setting.
+      - A professional portrait of [token], a caucasian man in his 40s and full head
+        of hair, dressed sharply in a navy suit, captured with soft natural light.
+      - A high-quality headshot of [token], a caucasian man in his 40s and full head
+        of hair, taken with an 85mm lens, emphasizing realism and authority.
+      - A corporate headshot of [token] , a caucasian man in his 40s, standing before
+        a sleek office backdrop with a confident expression.
+      - An executive portrait of [token], a caucasian man in his 40s and full head
+        of hair, softly lit, showcasing subtle facial details and approachability.
+      - A detailed business headshot of [token], a caucasian man in his 40s and full
+        head of hair, framed with a blurred modern office environment.
+      - A professional close-up of [token], a caucasian man in his 40s and full head
+        of hair, using a shallow depth of field to highlight facial authenticity.
+      - A crisp and polished portrait of [token], a caucasian man in his 40s and full
+        head of hair, captured in a well-lit professional setting.
+      - A corporate-style image of [token], a caucasian man in his 40s and full head
+        of hair, with a slight smile, reinforcing trust and professionalism.
+      - A photorealistic shot of [token], a caucasian man in his 40s and full head
+        of hair, taken with a Nikon D850, emphasizing clarity and natural texture.
+      neg: ''
+      seed: 42
+      walk_seed: true
+      lora_scale: 1.3
+      guidance_scale: 4.1
+      sample_steps: 29
+meta:
+  name: steve_lora_flux_1_dev_v1.2
+  version: '1.2'