farjadmalik commited on
Commit
c6f85d1
·
1 Parent(s): a20609a

MVP application

Browse files
.gitignore ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python virtual environment
2
+ venv/
3
+ .venv/
4
+ env/
5
+ .env/
6
+
7
+ # Python cache
8
+ __pycache__/
9
+ *.pyc
10
+
11
+ # Output files
12
+ outputs/
13
+
14
+ # Incomplete files
15
+ src/audio_synthesizer.py
README.md CHANGED
@@ -1,14 +1,64 @@
1
- ---
2
- title: FromWordsToMedia
3
- emoji: 🖼
4
- colorFrom: purple
5
- colorTo: red
6
- sdk: gradio
7
- sdk_version: 5.25.2
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- short_description: Generates an image and a caption for social media posts
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # From Words to Reels
2
+
3
+ This project generates social media posts, including an image and a caption, from a user-provided text prompt. It leverages deep learning models for both text-to-image synthesis and text generation to create engaging content.
4
+
5
+ ## How it Works
6
+
7
+ The process is orchestrated by the `main.py` script and follows these steps:
8
+
9
+ 1. **User Input**: The script prompts the user to enter a text prompt.
10
+ 2. **Image Generation**: The `VisualSynthesizer` takes the prompt, enhances it, and uses a text-to-image diffusion model (e.g., Stable Diffusion) to generate a corresponding image.
11
+ 3. **Caption Generation**: The `TextSynthesizer` uses the original prompt to generate a suitable caption for the post using a causal language model.
12
+ 4. **Output**: Both the generated image (`.png`) and the caption (`.txt`) are saved to the `outputs/` directory, prefixed with a timestamp.
13
+
14
+ ## Project Structure
15
+
16
+ ```
17
+ .
18
+ ├── main.py # Main script to run the application
19
+ ├── README.md # This file
20
+ ├── outputs/ # Directory for generated images and captions
21
+ ├── src/
22
+ │ ├── visual_synthesizer.py # Handles image generation
23
+ │ ├── text_synthesizer.py # Handles text/caption generation
24
+ └── utils/
25
+ ├── config.py # Configuration for models and paths
26
+ └── helpers.py # Helper functions for saving files etc.
27
+ ```
28
+
29
+ ## Setup and Installation
30
+
31
+ 1. **Create a virtual environment:**
32
+ ```bash
33
+ python -m venv venv
34
+ venv\Scripts\activate
35
+ ```
36
+
37
+ 2. **Install dependencies:**
38
+ Create a `requirements.txt` file with the following content:
39
+ ```
40
+ torch
41
+ diffusers
42
+ transformers
43
+ sentence-transformers
44
+ Pillow
45
+ accelerate
46
+ ```
47
+ Then run:
48
+ ```bash
49
+ pip install -r requirements.txt
50
+ ```
51
+
52
+ ## Usage
53
+
54
+ To generate a post, run the `main.py` script:
55
+
56
+ ```bash
57
+ python main.py
58
+ ```
59
+
60
+ You will be prompted to enter your text. After processing, the generated image and caption will be saved in the `outputs` directory.
61
+
62
+ ## Configuration
63
+
64
+ You can customize the models and other parameters by editing the `utils/config.py` file. This allows you to easily swap out different text-to-image or language models.
app.py CHANGED
@@ -1,154 +1,73 @@
 
 
1
  import gradio as gr
2
- import numpy as np
3
- import random
4
 
5
- # import spaces #[uncomment to use ZeroGPU]
6
- from diffusers import DiffusionPipeline
7
- import torch
8
-
9
- device = "cuda" if torch.cuda.is_available() else "cpu"
10
- model_repo_id = "stabilityai/sdxl-turbo" # Replace to the model you would like to use
11
-
12
- if torch.cuda.is_available():
13
- torch_dtype = torch.float16
14
- else:
15
- torch_dtype = torch.float32
16
-
17
- pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
18
- pipe = pipe.to(device)
19
-
20
- MAX_SEED = np.iinfo(np.int32).max
21
- MAX_IMAGE_SIZE = 1024
22
-
23
-
24
- # @spaces.GPU #[uncomment to use ZeroGPU]
25
- def infer(
26
- prompt,
27
- negative_prompt,
28
- seed,
29
- randomize_seed,
30
- width,
31
- height,
32
- guidance_scale,
33
- num_inference_steps,
34
- progress=gr.Progress(track_tqdm=True),
35
- ):
36
- if randomize_seed:
37
- seed = random.randint(0, MAX_SEED)
38
-
39
- generator = torch.Generator().manual_seed(seed)
40
-
41
- image = pipe(
42
- prompt=prompt,
43
- negative_prompt=negative_prompt,
44
- guidance_scale=guidance_scale,
45
- num_inference_steps=num_inference_steps,
46
- width=width,
47
- height=height,
48
- generator=generator,
49
- ).images[0]
50
-
51
- return image, seed
52
-
53
-
54
- examples = [
55
- "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
56
- "An astronaut riding a green horse",
57
- "A delicious ceviche cheesecake slice",
58
- ]
59
-
60
- css = """
61
- #col-container {
62
- margin: 0 auto;
63
- max-width: 640px;
64
- }
65
- """
66
-
67
- with gr.Blocks(css=css) as demo:
68
- with gr.Column(elem_id="col-container"):
69
- gr.Markdown(" # Text-to-Image Gradio Template")
70
-
71
- with gr.Row():
72
- prompt = gr.Text(
73
- label="Prompt",
74
- show_label=False,
75
- max_lines=1,
76
- placeholder="Enter your prompt",
77
- container=False,
78
- )
79
-
80
- run_button = gr.Button("Run", scale=0, variant="primary")
81
-
82
- result = gr.Image(label="Result", show_label=False)
83
-
84
- with gr.Accordion("Advanced Settings", open=False):
85
- negative_prompt = gr.Text(
86
- label="Negative prompt",
87
- max_lines=1,
88
- placeholder="Enter a negative prompt",
89
- visible=False,
90
- )
91
-
92
- seed = gr.Slider(
93
- label="Seed",
94
- minimum=0,
95
- maximum=MAX_SEED,
96
- step=1,
97
- value=0,
98
- )
99
-
100
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
101
-
102
- with gr.Row():
103
- width = gr.Slider(
104
- label="Width",
105
- minimum=256,
106
- maximum=MAX_IMAGE_SIZE,
107
- step=32,
108
- value=1024, # Replace with defaults that work for your model
109
- )
110
-
111
- height = gr.Slider(
112
- label="Height",
113
- minimum=256,
114
- maximum=MAX_IMAGE_SIZE,
115
- step=32,
116
- value=1024, # Replace with defaults that work for your model
117
- )
118
-
119
- with gr.Row():
120
- guidance_scale = gr.Slider(
121
- label="Guidance scale",
122
- minimum=0.0,
123
- maximum=10.0,
124
- step=0.1,
125
- value=0.0, # Replace with defaults that work for your model
126
- )
127
-
128
- num_inference_steps = gr.Slider(
129
- label="Number of inference steps",
130
- minimum=1,
131
- maximum=50,
132
- step=1,
133
- value=2, # Replace with defaults that work for your model
134
- )
135
-
136
- gr.Examples(examples=examples, inputs=[prompt])
137
- gr.on(
138
- triggers=[run_button.click, prompt.submit],
139
- fn=infer,
140
- inputs=[
141
- prompt,
142
- negative_prompt,
143
- seed,
144
- randomize_seed,
145
- width,
146
- height,
147
- guidance_scale,
148
- num_inference_steps,
149
  ],
150
- outputs=[result, seed],
 
 
151
  )
152
-
153
- if __name__ == "__main__":
154
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # External library imports
2
+ from datetime import datetime
3
  import gradio as gr
 
 
4
 
5
+ # Internal imports
6
+ from src.visual_synthesizer import VisualSynthesizer
7
+ from src.text_synthesizer import TextSynthesizer
8
+ # from src.audio_synthesizer import AudioSynthesizer
9
+ from utils.config import *
10
+ # from utils.logger import setup_logger
11
+ from utils.helpers import richify_prompt, save_caption, save_image
12
+
13
+
14
+ def compose(prompt: str, filename: str = "generated_post"):
15
+ """
16
+ Main function to compose an Instagram post from a given prompt.
17
+
18
+ Args:
19
+ prompt (str): The text prompt to generate the Instagram post.
20
+ """
21
+ # Generate a timestamp for the filename
22
+ # This is useful for ensuring unique filenames and tracking when the post was created
23
+ timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
24
+ filename = f"{timestamp}_{filename}"
25
+
26
+ # Initialize the visual synthesizer
27
+ image_gen = VisualSynthesizer()
28
+ # Generate the image
29
+ image = image_gen.generate_image(prompt=richify_prompt(prompt))
30
+ # Save the image
31
+ image_path = save_image(image, filename=filename)
32
+ print(f"Image saved at: {image_path}")
33
+ # Create a caption for the post
34
+ text_gen = TextSynthesizer()
35
+ caption = text_gen.generate_caption(prompt=prompt)
36
+ # Save the caption
37
+ caption_path = save_caption(caption, filename=filename)
38
+ print(f"Caption saved at: {caption_path}")
39
+ return image_path, caption
40
+
41
+
42
+ if __name__ == '__main__':
43
+ iface = gr.Interface(
44
+ fn=compose,
45
+ inputs=gr.Textbox(lines=5, label="Prompt", placeholder="Enter your prompt here..."),
46
+ outputs=[
47
+ gr.Image(type="filepath", label="Generated Image"),
48
+ gr.Textbox(label="Generated Caption")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  ],
50
+ title="From Words to Reels",
51
+ description="Enter a prompt to generate an image and a corresponding social media caption.",
52
+ allow_flagging="never"
53
  )
54
+
55
+ # Launch the Gradio app
56
+ iface.launch()
57
+
58
+ # print(f"From words to reels, creates instagramable posts for your prompts")
59
+ # # setup_logger()
60
+ # input_prompt = input("Enter your prompt: ")
61
+ # if not input_prompt:
62
+ # print("No prompt provided. Using default prompt.")
63
+ # input_prompt = (
64
+ # "Cosmos and the Universe, a vast expanse of stars and galaxies, "
65
+ # "a reminder of our place in the universe. The beauty of the cosmos is "
66
+ # "a source of inspiration and wonder, a reminder that we are part of something much larger than ourselves."
67
+ # "The universe is a canvas, painted with the colors of creation, a masterpiece that continues to unfold before our eyes."
68
+ # )
69
+ # input_prompt = "Indeed, with hardship comes ease. - Quran 94:6"
70
+
71
+ # # Compose a post given the prompt
72
+ # compose(prompt=input_prompt)
73
+ # print(f"Composition successfull. Check the output directory for the generated image and caption.")
requirements.txt CHANGED
@@ -1,6 +1,8 @@
1
- accelerate
2
- diffusers
3
- invisible_watermark
4
  torch
5
  transformers
6
- xformers
 
 
 
 
 
1
+ gradio
 
 
2
  torch
3
  transformers
4
+ diffusers
5
+ sentence-transformers
6
+ pillow
7
+ moviepy
8
+ asyncio
src/__init__.py ADDED
File without changes
src/text_synthesizer.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
3
+ from sentence_transformers import SentenceTransformer
4
+
5
+ # Importing the necessary configuration for model names
6
+ from utils.config import TEXT_MODEL_NAME, EMBEDDING_MODEL_NAME
7
+
8
+
9
+ class TextSynthesizer:
10
+ def __init__(self, embed_model=EMBEDDING_MODEL_NAME, text_model=TEXT_MODEL_NAME): # TinyLlama/TinyLlama-1.1B-Chat-v1.0 microsoft/phi-2 HuggingFaceH4/zephyr-1.1B-alpha model_name: str = "gpt-3.5-turbo"
11
+ """
12
+ Initializes the TextAnalyzer with a specified sentence-transformer model.
13
+
14
+ Args:
15
+ model_name (str): The name of the sentence-transformer model to use.
16
+ """
17
+ self.model = SentenceTransformer(embed_model)
18
+ self.tokenizer = AutoTokenizer.from_pretrained(text_model)
19
+ self.text_model = AutoModelForCausalLM.from_pretrained(
20
+ text_model,
21
+ device_map="auto",
22
+ torch_dtype="auto"
23
+ )
24
+
25
+ self.text_generator = pipeline(
26
+ 'text-generation',
27
+ model=text_model,
28
+ tokenizer=self.tokenizer
29
+ )
30
+
31
+ def get_embedding(self, text: str):
32
+ """
33
+ Generates an embedding for the input text.
34
+
35
+ Args:
36
+ text (str): The input text (e.g., a quote, poem, or verse).
37
+
38
+ Returns:
39
+ numpy.ndarray: A vector embedding of the text.
40
+ """
41
+ return self.model.encode(text)
42
+
43
+ def clean_text(self, text: str) -> str:
44
+ """
45
+ Clean and normalize input text by removing unwanted characters and trimming extra whitespace.
46
+ """
47
+ # Remove any characters except letters, numbers, punctuation, and basic symbols
48
+ cleaned = re.sub(r"[^\w\s.,:;!?'\"-]", "", text)
49
+ # Normalize whitespace to single spaces
50
+ cleaned = re.sub(r"\s+", " ", cleaned).strip()
51
+ return cleaned
52
+
53
+
54
+ def validate_text_length(self, text: str, max_length: int = 300) -> bool:
55
+ """
56
+ Validate text length to avoid overly long inputs.
57
+ Returns True if valid, False otherwise.
58
+ """
59
+ return 0 < len(text) <= max_length
60
+
61
+ def extract_keywords(self, text: str) -> list:
62
+ """
63
+ Extracts keywords from the given text.
64
+ This is a placeholder and can be enhanced with more sophisticated NLP techniques
65
+ or another LLM for semantic keyword extraction.
66
+
67
+ Args:
68
+ text (str): The input text.
69
+
70
+ Returns:
71
+ list: A list of extracted keywords.
72
+ """
73
+ # Simple example: convert to lowercase and split by whitespace.
74
+ # For production, consider using NLTK, SpaCy, or an LLM-based keyword extractor.
75
+ return [word for word in text.lower().split() if len(word) > 2] # Basic filtering
76
+
77
+ def generate_caption(self, prompt: str, max_new_tokens: int = 300) -> str:
78
+ """
79
+ Generates a caption based on the provided prompt, enriched with poetic or authoritative quotes.
80
+
81
+ :param prompt: The input text prompt for text generation.
82
+ :param max_length: The maximum length of the generated caption.
83
+ :return: The generated caption.
84
+ """
85
+ # Craft a more detailed prompt for the model to generate a fitting caption
86
+ generation_prompt = (
87
+ f"Write a detailed, poetic, and informative paragraph about the following topic: \n'{prompt}'.\n"
88
+ f"Use vivid, emotional language and include relevant verses or quotes by poets, philosophers, or scientists."
89
+ f"The paragraph should be knowledgable, well researched and engaging. The tone should be educational and inspirational, not casual or conversational."
90
+ f"Dont use emojis or hastags or words like response or answer, just write the paragraph directly.\n"
91
+ )
92
+
93
+ generated_outputs = self.text_generator(
94
+ generation_prompt,
95
+ max_new_tokens=max_new_tokens,
96
+ temperature=0.7,
97
+ top_p=0.9,
98
+ do_sample=True,
99
+ repetition_penalty=1.2, # <-- helps reduce loops
100
+ num_return_sequences=1,
101
+ eos_token_id=self.tokenizer.eos_token_id,
102
+ pad_token_id=self.tokenizer.eos_token_id
103
+ )
104
+
105
+ raw_output = generated_outputs[0]['generated_text']
106
+
107
+ # Remove the initial prompt from the result
108
+ if generation_prompt in raw_output:
109
+ # The model will return the prompt plus the generated text, so we clean it up.
110
+ # We find the generated part by removing the initial prompt.
111
+ caption = raw_output.split(generation_prompt)[-1].strip()
112
+ else:
113
+ caption = raw_output.strip()
114
+
115
+ # Clean up the caption to ensure it's a single coherent block
116
+ caption = caption.replace(generation_prompt, "").strip()
117
+
118
+ # Further clean the text to ensure it's a single, coherent block
119
+ # caption = self.clean_text(caption.split('\n')[0])
120
+
121
+ return caption
src/visual_synthesizer.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from diffusers.pipelines.auto_pipeline import AutoPipelineForText2Image
3
+ from diffusers.pipelines.pipeline_utils import DiffusionPipeline
4
+ from diffusers.utils.export_utils import export_to_video
5
+ from typing import Optional
6
+ # Importing the model name from a configuration file
7
+ # This allows for easy changes to the model without modifying the code
8
+ # Ensure that the model_name is defined in utils/config.py
9
+ from utils.config import IMG_MODEL_NAME, VIDEO_MODEL_NAME, OUTPUT_DIR
10
+
11
+
12
+ class VisualSynthesizer:
13
+ def __init__(self,
14
+ img_model: str = IMG_MODEL_NAME,
15
+ video_model: str = VIDEO_MODEL_NAME):
16
+ """
17
+ Initializes the ImageGenerator with a specified text-to-image model.
18
+
19
+ Args:
20
+ img_model (str): The Hugging Face model ID for the diffusion model.
21
+ video_model (str): The Hugging Face model ID for the video generation model (if applicable).
22
+ """
23
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
24
+ self.torch_dtype = torch.float16 if self.device == "cuda" else torch.float32
25
+
26
+ torch.backends.cudnn.benchmark = True # Optimize for input sizes
27
+
28
+ # Initialize text-to-image pipeline with the specified model
29
+ self.image_pipe = AutoPipelineForText2Image.from_pretrained(
30
+ img_model,
31
+ torch_dtype=self.torch_dtype,
32
+ variant="fp16" if self.torch_dtype == torch.float16 else None,
33
+ low_cpu_mem_usage=True
34
+ ).to(self.device)
35
+
36
+ # Initialize text-to-video pipeline
37
+ # self.video_pipe = DiffusionPipeline.from_pretrained(
38
+ # video_model,
39
+ # torch_dtype=self.torch_dtype,
40
+ # variant="fp16" if self.torch_dtype == torch.float16 else None,
41
+ # low_cpu_mem_usage=True
42
+ # ).to(self.device)
43
+ # self.video_pipe.enable_model_cpu_offload()
44
+
45
+
46
+ def generate_image(self, prompt: str,
47
+ negative_prompt: str = "blurry, distorted, poorly drawn, watermark",
48
+ num_inference_steps: int = 50, guidance_scale: float = 7.5):
49
+ image = self.image_pipe(prompt,
50
+ negative_prompt=negative_prompt,
51
+ num_inference_steps=num_inference_steps,
52
+ guidance_scale=guidance_scale
53
+ ).images[0]
54
+ return image
55
+
56
+ # TODO: Fix the video generation method use the correct pipeline and parameters
57
+ # This is a placeholder implementation, adjust as needed for your video generation requirements
58
+ def generate_video(
59
+ self,
60
+ prompt: str,
61
+ negative_prompt: Optional[str] = None,
62
+ num_frames: int = 24, # ~1 second at 24 fps
63
+ fps: int = 8,
64
+ output_path: Optional[str] = "output.mp4",
65
+ guidance_scale: float = 12.5,
66
+ num_inference_steps: int = 25
67
+ ) -> str: # type: ignore
68
+ """
69
+ Generates a short video from a text prompt.
70
+
71
+ Args:
72
+ prompt (str): Text prompt to guide generation.
73
+ negative_prompt (str): Optional negative prompts.
74
+ num_frames (int): Number of video frames.
75
+ fps (int): Frame rate for the video.
76
+ output_path (str): Path to save output video.
77
+ guidance_scale (float): Guidance scale for generation.
78
+ num_inference_steps (int): Number of inference steps.
79
+
80
+ Returns:
81
+ str: Path to saved video file.
82
+ """
83
+ # video_output = self.video_pipe(
84
+ # prompt=prompt,
85
+ # negative_prompt=negative_prompt,
86
+ # num_frames=num_frames,
87
+ # guidance_scale=guidance_scale,
88
+ # num_inference_steps=num_inference_steps
89
+ # ).frames
90
+
91
+ # result = self.video_pipe(prompt, num_frames=num_frames, **kwargs)
92
+ # frames = result.frames[0]
93
+ # video_path = export_to_video(frames, output_video_path=f"{OUTPUT_DIR}_video", fps=fps)
94
+ # return video_path
95
+ pass
utils/__init__.py ADDED
File without changes
utils/config.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configuration variables to centralize parameters and paths.
3
+ """
4
+ OUTPUT_DIR = "outputs/" # Directory to save generated image and captions
5
+
6
+ # Image generation settings
7
+ IMAGE_SIZE = (512, 512) # Size of the generated images
8
+
9
+ # Model names for easy change and reuse
10
+ EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
11
+ TEXT_MODEL_NAME = "microsoft/phi-2"
12
+ AUDIO_MODEL_NAME = "" # Placeholder for audio model, can be set later
13
+
14
+ # Stable Diffusion model and device to run on
15
+ IMG_MODEL_NAME = "runwayml/stable-diffusion-v1-5"
16
+ VIDEO_MODEL_NAME = "cerspense/zeroscope_v2_XL" # Placeholder for video model
17
+ # Other models to try # Qwen/Qwen-Image # CompVis/stable-diffusion-v1-4
18
+ # "segmind/SSD-1B" # Or "kandinsky-community/kandinsky-3", "warp-ai/wuerstchen"
19
+ # Video generation models # cerspense/zeroscope_v2_576w # Wan‑Video/Wan2.1
20
+ DEVICE = "cuda" # Change to "cpu" if no GPU available
21
+
22
+ # Font path for overlay text
23
+ # FONT_PATH = "./fonts/arial.ttf"
24
+ # FONT_SIZE = 40
utils/helpers.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from datetime import datetime
3
+ from utils.config import OUTPUT_DIR
4
+
5
+
6
+ def save_caption(caption: str, filename: str, output_dir: str = OUTPUT_DIR):
7
+ """
8
+ Save the generated caption to a text file.
9
+
10
+ Args:
11
+ caption (str): The generated text.
12
+ filename (str): Optional. The filename to use (without extension).
13
+ output_dir (str): Folder where the file will be saved. Defaults to 'outputs'.
14
+
15
+ Returns:
16
+ str: Full path to the saved file.
17
+ """
18
+ # Ensure the output directory exists
19
+ os.makedirs(output_dir, exist_ok=True)
20
+
21
+ # Generate filename if not provided
22
+ if not filename:
23
+ raise ValueError("Filename must be provided")
24
+ if not filename.endswith('.txt'):
25
+ filename += '.txt'
26
+
27
+ filepath = os.path.join(output_dir, filename)
28
+
29
+ # Save the caption
30
+ with open(filepath, "w", encoding="utf-8") as f:
31
+ f.write(caption.strip())
32
+
33
+ return filepath
34
+
35
+
36
+ def save_image(image, filename: str, output_dir: str = OUTPUT_DIR):
37
+ """
38
+ Saves the generated image to the specified directory.
39
+
40
+ Args:
41
+ image: The generated image to save.
42
+ output_dir (str): The directory where the image will be saved.
43
+ filename (str): The name of the file to save the image as.
44
+ """
45
+ if not os.path.exists(output_dir):
46
+ os.makedirs(output_dir)
47
+
48
+
49
+ # Generate filename if not provided
50
+ if not filename:
51
+ raise ValueError("Filename must be provided")
52
+ if not filename.endswith('.png'):
53
+ filename += '.png'
54
+ # Construct the full path
55
+ image_path = os.path.join(output_dir, filename)
56
+ image.save(image_path)
57
+ return image_path
58
+
59
+
60
+ def richify_prompt(text: str) -> str:
61
+ """
62
+ Beautifies the input text by removing extra spaces and ensuring proper formatting.
63
+
64
+ Args:
65
+ text (str): The input text to be beautified.
66
+
67
+ Returns:
68
+ str: The beautified text.
69
+ """
70
+ if not isinstance(text, str):
71
+ raise ValueError("Input must be a string")
72
+ text_prompt = (
73
+ f"(best quality:1.3), (intricate details:1.2), high-resolution digital painting of {text}, "
74
+ "ArtStation fine art"
75
+ )
76
+ return ' '.join(text_prompt.split()).strip() if text_prompt else ''
77
+
78
+
79
+ # More richify prompts
80
+ # image_prompt = f"A beautiful and artistic representation of the following text: '{text}'; in the style of Studio Ghibli, digital art, 4k, vibrant colors, intricate details, Artstation."
81
+ # Epic Cinematic Illustration
82
+ # image_prompt = f"(best quality:1.4), (masterpiece:1.3), (detailed:1.2), 4k, wide-angle cosmic panorama of the Big Bang and expanding universe transitioning into the creation of life on Earth, poetic illumination, vibrant nebulae and galaxies, in the style of Studio Ghibli and ArtStation concept art, divine origins, dramatic lighting, awe‑inspiring mood"
83
+ # Realistic Documentary Style
84
+ # image_prompt = f"(realistic cosmic time-lapse:1.2), (masterpiece:1.2), ultra-detailed 8k scientific illustration of cosmic evolution from the Big Bang to modern civilization, expanding galaxies, formation of Earth, emergence of life, soft ambient lighting, realistic textures, wide-angle shot, inspired by ArtStation and nature documentaries, contemplative mood"
85
+ # Animated Spiritual Universe
86
+ # image_prompt = f"(best quality:1.3), (intricate details:1.2), high-resolution digital painting of the universe expanding from the Big Bang into Earth’s formation, evolving life and early civilization, soft celestial lighting, pastel and vibrant colors, in the style of Studio Ghibli animation, ArtStation fine art, uplifting and mystical atmosphere, panoramic composition"
utils/logger.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+
4
+ LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
5
+ LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
6
+
7
+ def get_logger(name: str) -> logging.Logger:
8
+ logger = logging.getLogger(name)
9
+ if not logger.hasHandlers():
10
+ handler = logging.StreamHandler()
11
+ formatter = logging.Formatter(LOG_FORMAT)
12
+ handler.setFormatter(formatter)
13
+ logger.addHandler(handler)
14
+ logger.setLevel(LOG_LEVEL)
15
+ return logger
16
+
17
+ # Example usage:
18
+ # logger = get_logger(__name__)
19
+ # logger.info("Logger initialized.")