Skip to content

Commit

Permalink
Merge pull request #10926 from MPMG-DCC-UFMG/issue-5289
Browse files Browse the repository at this point in the history
Issue 5289 - Relacionando Navegador e User-Agent
  • Loading branch information
rennancl committed Jun 21, 2023
2 parents 9e7a6b0 + 5a487f2 commit d22e004
Show file tree
Hide file tree
Showing 6 changed files with 35 additions and 10 deletions.
9 changes: 9 additions & 0 deletions main/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class Meta:
'video_recording_enabled',

'browser_type',
'browser_user_agent',
'skip_iter_errors',
'browser_resolution_width',
'browser_resolution_height',
Expand Down Expand Up @@ -432,6 +433,14 @@ class RawCrawlRequestForm(CrawlRequestForm):
widget=forms.RadioSelect
)

browser_user_agent = forms.CharField(
label='User agent',
help_text='Use, de preferência, um user-agent que combine com o Navegador Web escolhido, seja o sugerido automaticamente abaixo ou outro de sua preferência.',
initial='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
required=False,
widget=forms.TextInput()
)

skip_iter_errors = forms.BooleanField(
required=False, label="Pular iterações com erro"
)
Expand Down
1 change: 1 addition & 0 deletions main/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ class CrawlRequest(TimeStamped):
('firefox', 'Mozilla Firefox'),
]
browser_type = models.CharField(max_length=50, choices=BROWSER_TYPE, default='chromium')
browser_user_agent = models.CharField(max_length=500, blank=True, null=True)

# If true, skips failing iterations with a warning, else, stops the crawler
# if an iteration fails
Expand Down
2 changes: 1 addition & 1 deletion main/staticfiles/css/style.css
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ table.dataTable thead .sorting_desc_disabled:before {
background-color: #E8F6EF;
}

#dynamic-processing-item-wrap.disabled, #dynamic-processing-skip-errors.disabled, #dynamic-processing-browser-type.disabled, #dynamic-processing-resolution.disabled, #dynamic-processing-debug-mode.disabled {
#dynamic-processing-item-wrap.disabled, #dynamic-processing-skip-errors.disabled, #dynamic-processing-browser-type.disabled, #dynamic-processing-resolution.disabled, #dynamic-processing-debug-mode.disabled, #dynamic-processing-browser-user-agent.disabled {
opacity: .5;
pointer-events: none;
}
Expand Down
16 changes: 16 additions & 0 deletions main/staticfiles/js/create_crawler.js
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,7 @@ function detailDynamicProcessing() {
dynamic_processing_skip_errors = document.getElementById("dynamic-processing-skip-errors")
dynamic_processing_resolution = document.getElementById("dynamic-processing-resolution")
dynamic_processing_browser_type = document.getElementById("dynamic-processing-browser-type")
dynamic_processing_browser_user_agent = document.getElementById("dynamic-processing-browser-user-agent")
dynamic_processing_debug_mode = document.getElementById("dynamic-processing-debug-mode")

if(getCheckboxState("id_dynamic_processing")){
Expand All @@ -421,17 +422,32 @@ function detailDynamicProcessing() {
dynamic_processing_skip_errors.classList.remove("disabled")
dynamic_processing_resolution.classList.remove("disabled")
dynamic_processing_browser_type.classList.remove("disabled")
dynamic_processing_browser_user_agent.classList.remove("disabled")
dynamic_processing_debug_mode.classList.remove("disabled")
}else{
dynamic_processing_check.classList.add("disabled")
dynamic_processing_block.classList.add("disabled")
dynamic_processing_skip_errors.classList.add("disabled")
dynamic_processing_resolution.classList.add("disabled")
dynamic_processing_browser_type.classList.add("disabled")
dynamic_processing_browser_user_agent.classList.add("disabled")
dynamic_processing_debug_mode.classList.add("disabled")
}
}

// changes the user-agent to match the browser type
$(document).ready(function(){
$("#dynamic-processing-browser-type").change(function(){
user_agent = {};
user_agent['chromium'] = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36';
user_agent['firefox'] = 'Mozilla/5.0 (X11; Linux i686; rv:111.0) Gecko/20100101 Firefox/111.0';
user_agent['webkit'] = 'Mozilla/5.0 (X11; U; Linux x86_64; en-us) AppleWebKit/531.2+ (KHTML, like Gecko) Version/5.0 Safari/531.2+';

browser_type = $('input[name=browser_type]:checked', '#dynamic-processing-browser-type').val();
document.querySelector("#id_browser_user_agent").value = user_agent[browser_type]
});
});

function detailCaptcha() {
var mainSelect = document.getElementById("id_captcha");
const captcha_type = mainSelect.options[mainSelect.selectedIndex].value;
Expand Down
9 changes: 6 additions & 3 deletions main/templates/main/_create_06_dynamic.html
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,22 @@
</div>
</div>
<br>
<div id="dynamic-processing-browser-type" class="">
<div id="dynamic-processing-browser-type" class="hidden">
{{ form.browser_type | as_crispy_field}}
</div>
<div id="dynamic-processing-browser-user-agent" class="hidden">
{{ form.browser_user_agent | as_crispy_field}}
</div>
<div id="dynamic-processing-skip-errors" class="hidden">
{{ form.skip_iter_errors | as_crispy_field}}
</div>
<div id="dynamic-processing-debug-mode" class="hidden">
Modo Debug
<p class="small">Ferramentas de depuração de coletores que utilizam processamento dinâmico</p>
<div id="dynamic-processing-trace-enabled" class="">
<div id="dynamic-processing-trace-enabled" class="hidden">
{{ form.create_trace_enabled | as_crispy_field}}
</div>
<div id="dynamic-processing-video-recording_enabled" class="">
<div id="dynamic-processing-video-recording_enabled" class="hidden">
{{ form.video_recording_enabled | as_crispy_field}}
</div>
</div>
Expand Down
8 changes: 2 additions & 6 deletions spider_manager/src/crawling/spiders/static_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,16 +396,12 @@ async def dynamic_processing(self, response):
# Set browser context
context_kwargs = {}

# Set the user agent according to what was sent by Scrapy
if 'user-agent' in normalized_headers:
context_kwargs['user_agent'] = \
normalized_headers['user-agent']

context_kwargs['user_agent'] = self.config['browser_user_agent']

if self.config['video_recording_enabled']:
context_kwargs['record_video_dir'] = os.path.join(instance_path, 'debug', 'video')
context_kwargs['record_video_size'] = {"width": self.config["browser_resolution_width"], "height": self.config["browser_resolution_height"]}

context = await browser.new_context(**context_kwargs)

if self.config['create_trace_enabled']:
Expand Down

0 comments on commit d22e004

Please sign in to comment.