protected $mimeGuesser;
protected $fetchingErrorMessage;
protected $eventDispatcher;
+ protected $storeArticleHeaders;
- public function __construct(Graby $graby, RuleBasedTagger $tagger, ValidatorInterface $validator, LoggerInterface $logger, $fetchingErrorMessage)
+ public function __construct(Graby $graby, RuleBasedTagger $tagger, ValidatorInterface $validator, LoggerInterface $logger, $fetchingErrorMessage, $storeArticleHeaders = false)
{
$this->graby = $graby;
$this->tagger = $tagger;
$this->logger = $logger;
$this->mimeGuesser = new MimeTypeExtensionGuesser();
$this->fetchingErrorMessage = $fetchingErrorMessage;
+ $this->storeArticleHeaders = $storeArticleHeaders;
}
/**
// so we'll be able to refetch it in the future
$content['url'] = !empty($content['url']) ? $content['url'] : $url;
+ // In one case (at least in tests), url is empty here
+ // so we set it using $url provided in the updateEntry call.
+ // Not sure what are the other possible cases where this property is empty
+ if (empty($entry->getUrl()) && !empty($url)) {
+ $entry->setUrl($url);
+ }
+
$this->stockEntry($entry, $content);
}
$date = $value;
// is it a timestamp?
- if (filter_var($date, FILTER_VALIDATE_INT) !== false) {
+ if (false !== filter_var($date, FILTER_VALIDATE_INT)) {
$date = '@' . $date;
}
}
}
+ /**
+ * Helper to extract and save host from entry url.
+ *
+ * @param Entry $entry
+ */
+ public function setEntryDomainName(Entry $entry)
+ {
+ $domainName = parse_url($entry->getUrl(), PHP_URL_HOST);
+ if (false !== $domainName) {
+ $entry->setDomainName($domainName);
+ }
+ }
+
+ /**
+ * Helper to set a default title using:
+ * - url basename, if applicable
+ * - hostname.
+ *
+ * @param Entry $entry
+ */
+ public function setDefaultEntryTitle(Entry $entry)
+ {
+ $url = parse_url($entry->getUrl());
+ $path = pathinfo($url['path'], PATHINFO_BASENAME);
+
+ if (empty($path)) {
+ $path = $url['host'];
+ }
+
+ $entry->setTitle($path);
+ }
+
/**
* Stock entry with fetched or imported content.
* Will fall back to OpenGraph data if available.
*/
private function stockEntry(Entry $entry, array $content)
{
- $entry->setUrl($content['url']);
-
- $domainName = parse_url($entry->getUrl(), PHP_URL_HOST);
- if (false !== $domainName) {
- $entry->setDomainName($domainName);
+ // When a redirection occurs while fetching an entry
+ // we move the original url in origin_url property if empty
+ // and set the entry url with the final value
+ if (!empty($content['url']) && $entry->getUrl() !== $content['url']) {
+ if (empty($entry->getOriginUrl())) {
+ $entry->setOriginUrl($entry->getUrl());
+ }
+ $entry->setUrl($content['url']);
}
+ $this->setEntryDomainName($entry);
+
if (!empty($content['title'])) {
$entry->setTitle($content['title']);
} elseif (!empty($content['open_graph']['og_title'])) {
$entry->setPublishedBy($content['authors']);
}
- if (!empty($content['all_headers'])) {
+ if (!empty($content['all_headers']) && $this->storeArticleHeaders) {
$entry->setHeaders($content['all_headers']);
}